From 9c8d273acddcef0b95cbd31cca59bb9a1586d701 Mon Sep 17 00:00:00 2001 From: Christopher Whelan Date: Wed, 23 Jan 2019 23:36:34 -0800 Subject: [PATCH 1/2] CLN: use CategoricalIndex.from_codes in benchmark setup for performance --- asv_bench/benchmarks/categoricals.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index e5dab0cb066aa..3a483e9ea7cac 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -223,11 +223,16 @@ class CategoricalSlicing(object): def setup(self, index): N = 10**6 - values = list('a' * N + 'b' * N + 'c' * N) + categories = ['a', 'b', 'c'] + values = [0] * N + [1] * N + [2] * N indices = { - 'monotonic_incr': pd.Categorical(values), - 'monotonic_decr': pd.Categorical(reversed(values)), - 'non_monotonic': pd.Categorical(list('abc' * N))} + 'monotonic_incr': pd.Categorical.from_codes(values, + categories=categories), + 'monotonic_decr': pd.Categorical.from_codes(list(reversed(values)), + categories=categories), + 'non_monotonic': pd.Categorical.from_codes([0, 1, 2] * N, + categories=categories) + } self.data = indices[index] self.scalar = 10000 From 704ee8f796bc7549f4f72fd1d742ee7cd18dc162 Mon Sep 17 00:00:00 2001 From: Christopher Whelan Date: Wed, 23 Jan 2019 23:43:03 -0800 Subject: [PATCH 2/2] CLN: only construct case necessary for benchmark --- asv_bench/benchmarks/categoricals.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index 3a483e9ea7cac..4b5b2848f7e0f 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -225,15 +225,17 @@ def setup(self, index): N = 10**6 categories = ['a', 'b', 'c'] values = [0] * N + [1] * N + [2] * N - indices = { - 'monotonic_incr': pd.Categorical.from_codes(values, - categories=categories), - 'monotonic_decr': pd.Categorical.from_codes(list(reversed(values)), - categories=categories), - 'non_monotonic': pd.Categorical.from_codes([0, 1, 2] * N, - categories=categories) - } - self.data = indices[index] + if index == 'monotonic_incr': + self.data = pd.Categorical.from_codes(values, + categories=categories) + elif index == 'monotonic_decr': + self.data = pd.Categorical.from_codes(list(reversed(values)), + categories=categories) + elif index == 'non_monotonic': + self.data = pd.Categorical.from_codes([0, 1, 2] * N, + categories=categories) + else: + raise ValueError('Invalid index param: {}'.format(index)) self.scalar = 10000 self.list = list(range(10000))