From 3273950a3a2476ab067520d596d016663cb8daca Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Tue, 25 Oct 2022 22:12:39 -0400 Subject: [PATCH 01/16] Add a benchmark for dataset element insertion --- asv_bench/benchmarks/dataset_creation.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 asv_bench/benchmarks/dataset_creation.py diff --git a/asv_bench/benchmarks/dataset_creation.py b/asv_bench/benchmarks/dataset_creation.py new file mode 100644 index 00000000000..7d731f86085 --- /dev/null +++ b/asv_bench/benchmarks/dataset_creation.py @@ -0,0 +1,18 @@ +import xarray as xr + + +class Creation: + def setup(self): + # Everybody is lazy loading these days + # so lets force modules to get instantiated here, instead of + # in the benchmark + dummy_dataset = xr.Dataset() + dummy_dataset['a'] = 1 + dummy_dataset['b'] = 1 + + self.dataset = xr.Dataset() + + def time_dataset_creation(self): + dataset = self.dataset + for i in range(1000): + dataset[f"var{i}"] = i From 151b2a2729ebc12b63d49901feec788f02bb9224 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 26 Oct 2022 02:14:17 +0000 Subject: [PATCH 02/16] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- asv_bench/benchmarks/dataset_creation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/dataset_creation.py b/asv_bench/benchmarks/dataset_creation.py index 7d731f86085..94e007c2a71 100644 --- a/asv_bench/benchmarks/dataset_creation.py +++ b/asv_bench/benchmarks/dataset_creation.py @@ -7,8 +7,8 @@ def setup(self): # so lets force modules to get instantiated here, instead of # in the benchmark dummy_dataset = xr.Dataset() - dummy_dataset['a'] = 1 - dummy_dataset['b'] = 1 + dummy_dataset["a"] = 1 + dummy_dataset["b"] = 1 self.dataset = xr.Dataset() From 2837d95ab941f7df85efadbef2460150c80e3d87 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 26 Oct 2022 06:46:07 +0200 Subject: [PATCH 03/16] Update asv_bench/benchmarks/dataset_creation.py --- asv_bench/benchmarks/dataset_creation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/dataset_creation.py b/asv_bench/benchmarks/dataset_creation.py index 94e007c2a71..f7cb4638e1b 100644 --- a/asv_bench/benchmarks/dataset_creation.py +++ b/asv_bench/benchmarks/dataset_creation.py @@ -14,5 +14,5 @@ def setup(self): def time_dataset_creation(self): dataset = self.dataset - for i in range(1000): + for i in range(100): dataset[f"var{i}"] = i From cbd29aafbce7d6a569fbd31354dcd0ff5be2f102 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Wed, 26 Oct 2022 08:08:05 -0400 Subject: [PATCH 04/16] Rework the benchmark --- asv_bench/benchmarks/dataset_creation.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/asv_bench/benchmarks/dataset_creation.py b/asv_bench/benchmarks/dataset_creation.py index f7cb4638e1b..4e27657bcce 100644 --- a/asv_bench/benchmarks/dataset_creation.py +++ b/asv_bench/benchmarks/dataset_creation.py @@ -1,18 +1,18 @@ import xarray as xr +from . import parameterized class Creation: - def setup(self): - # Everybody is lazy loading these days - # so lets force modules to get instantiated here, instead of - # in the benchmark - dummy_dataset = xr.Dataset() - dummy_dataset["a"] = 1 - dummy_dataset["b"] = 1 + def setup(self, elements): + self.datasets = {} + # Dictionary insertion is fast(er) than xarray.Dataser insertion + d = {} + for i in range(elements): + d[f"var{i}"] = i + self.dataset = xr.merge([d]) - self.dataset = xr.Dataset() - - def time_dataset_creation(self): + @parameterized(["elements"], [(0, 10, 100, 1000)]) + def time_dataset_creation(self, elements): dataset = self.dataset - for i in range(100): - dataset[f"var{i}"] = i + for i in range(5): + dataset[f"new_var{i}"] = i From c7b3fb98e87b1441e3ee640b7d9f7d8e29547998 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Wed, 26 Oct 2022 08:10:09 -0400 Subject: [PATCH 05/16] lint before the bot --- asv_bench/benchmarks/dataset_creation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/asv_bench/benchmarks/dataset_creation.py b/asv_bench/benchmarks/dataset_creation.py index 4e27657bcce..ab9b8125fa0 100644 --- a/asv_bench/benchmarks/dataset_creation.py +++ b/asv_bench/benchmarks/dataset_creation.py @@ -1,4 +1,5 @@ import xarray as xr + from . import parameterized From e2fbec49cfbc3e613469d0d59fef8c59d61b3092 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Wed, 26 Oct 2022 12:19:42 -0400 Subject: [PATCH 06/16] Update asv_bench/benchmarks/dataset_creation.py Co-authored-by: Deepak Cherian --- asv_bench/benchmarks/dataset_creation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/asv_bench/benchmarks/dataset_creation.py b/asv_bench/benchmarks/dataset_creation.py index ab9b8125fa0..95e31f85d45 100644 --- a/asv_bench/benchmarks/dataset_creation.py +++ b/asv_bench/benchmarks/dataset_creation.py @@ -4,6 +4,8 @@ class Creation: + params = [0, 10, 100, 1000] + def setup(self, elements): self.datasets = {} # Dictionary insertion is fast(er) than xarray.Dataser insertion From b6bb7a8778d6647c55dad51a0efb19a0bc4ac1f7 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Wed, 26 Oct 2022 12:19:49 -0400 Subject: [PATCH 07/16] Update asv_bench/benchmarks/dataset_creation.py Co-authored-by: Deepak Cherian --- asv_bench/benchmarks/dataset_creation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/asv_bench/benchmarks/dataset_creation.py b/asv_bench/benchmarks/dataset_creation.py index 95e31f85d45..c589e3b73e7 100644 --- a/asv_bench/benchmarks/dataset_creation.py +++ b/asv_bench/benchmarks/dataset_creation.py @@ -14,7 +14,6 @@ def setup(self, elements): d[f"var{i}"] = i self.dataset = xr.merge([d]) - @parameterized(["elements"], [(0, 10, 100, 1000)]) def time_dataset_creation(self, elements): dataset = self.dataset for i in range(5): From ee83f2b6ad812e69228d3aaa4009758c672101bb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 26 Oct 2022 16:21:35 +0000 Subject: [PATCH 08/16] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- asv_bench/benchmarks/dataset_creation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/dataset_creation.py b/asv_bench/benchmarks/dataset_creation.py index c589e3b73e7..cab1a5fd8ca 100644 --- a/asv_bench/benchmarks/dataset_creation.py +++ b/asv_bench/benchmarks/dataset_creation.py @@ -5,7 +5,7 @@ class Creation: params = [0, 10, 100, 1000] - + def setup(self, elements): self.datasets = {} # Dictionary insertion is fast(er) than xarray.Dataser insertion From 7627fc95ade6e547810001c3cca78d3345df4720 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Wed, 26 Oct 2022 12:20:43 -0400 Subject: [PATCH 09/16] Lint --- asv_bench/benchmarks/dataset_creation.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/asv_bench/benchmarks/dataset_creation.py b/asv_bench/benchmarks/dataset_creation.py index cab1a5fd8ca..e55c423aa48 100644 --- a/asv_bench/benchmarks/dataset_creation.py +++ b/asv_bench/benchmarks/dataset_creation.py @@ -1,9 +1,7 @@ import xarray as xr -from . import parameterized - -class Creation: +class DatasetOperations: params = [0, 10, 100, 1000] def setup(self, elements): @@ -14,7 +12,7 @@ def setup(self, elements): d[f"var{i}"] = i self.dataset = xr.merge([d]) - def time_dataset_creation(self, elements): + def time_dataset_insertion(self, elements): dataset = self.dataset for i in range(5): dataset[f"new_var{i}"] = i From ba2e7ad34e464d8c4578ca6b35b2fe442897269c Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Wed, 26 Oct 2022 12:21:25 -0400 Subject: [PATCH 10/16] Rename the benchmark --- .../{dataset_creation.py => dataset_in_memory_operation.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename asv_bench/benchmarks/{dataset_creation.py => dataset_in_memory_operation.py} (100%) diff --git a/asv_bench/benchmarks/dataset_creation.py b/asv_bench/benchmarks/dataset_in_memory_operation.py similarity index 100% rename from asv_bench/benchmarks/dataset_creation.py rename to asv_bench/benchmarks/dataset_in_memory_operation.py From 33b62007986b660b500cab381e8051f653fb2a3a Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Wed, 26 Oct 2022 12:22:27 -0400 Subject: [PATCH 11/16] Rename benchmark --- asv_bench/benchmarks/dataset_in_memory_operation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/dataset_in_memory_operation.py b/asv_bench/benchmarks/dataset_in_memory_operation.py index e55c423aa48..23462b3c8fb 100644 --- a/asv_bench/benchmarks/dataset_in_memory_operation.py +++ b/asv_bench/benchmarks/dataset_in_memory_operation.py @@ -1,7 +1,7 @@ import xarray as xr -class DatasetOperations: +class DatasetInMemoryOperations: params = [0, 10, 100, 1000] def setup(self, elements): From b7582ae9d4df8f146d09a2d3e1fdfdb2360d77f8 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Wed, 26 Oct 2022 13:52:54 -0400 Subject: [PATCH 12/16] Update asv_bench/benchmarks/dataset_in_memory_operation.py Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- asv_bench/benchmarks/dataset_in_memory_operation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/dataset_in_memory_operation.py b/asv_bench/benchmarks/dataset_in_memory_operation.py index 23462b3c8fb..0e9b63696c0 100644 --- a/asv_bench/benchmarks/dataset_in_memory_operation.py +++ b/asv_bench/benchmarks/dataset_in_memory_operation.py @@ -6,7 +6,7 @@ class DatasetInMemoryOperations: def setup(self, elements): self.datasets = {} - # Dictionary insertion is fast(er) than xarray.Dataser insertion + # Dictionary insertion is fast(er) than xarray.Dataset insertion d = {} for i in range(elements): d[f"var{i}"] = i From ee0cc92c7ff667688e4e647c7d1710c483cd41d8 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 26 Oct 2022 13:05:38 -0600 Subject: [PATCH 13/16] Update and rename dataset_in_memory_operation.py to merge.py --- .../benchmarks/{dataset_in_memory_operation.py => merge.py} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename asv_bench/benchmarks/{dataset_in_memory_operation.py => merge.py} (83%) diff --git a/asv_bench/benchmarks/dataset_in_memory_operation.py b/asv_bench/benchmarks/merge.py similarity index 83% rename from asv_bench/benchmarks/dataset_in_memory_operation.py rename to asv_bench/benchmarks/merge.py index 0e9b63696c0..656837addd8 100644 --- a/asv_bench/benchmarks/dataset_in_memory_operation.py +++ b/asv_bench/benchmarks/merge.py @@ -1,7 +1,7 @@ import xarray as xr -class DatasetInMemoryOperations: +class DatasetAddVariable: params = [0, 10, 100, 1000] def setup(self, elements): @@ -12,7 +12,7 @@ def setup(self, elements): d[f"var{i}"] = i self.dataset = xr.merge([d]) - def time_dataset_insertion(self, elements): + def time_variable_insertion(self): dataset = self.dataset for i in range(5): dataset[f"new_var{i}"] = i From e328ebfc1ef848a700569d19099d1c386f25ea84 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Wed, 26 Oct 2022 17:33:01 -0400 Subject: [PATCH 14/16] add back elements --- asv_bench/benchmarks/merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/merge.py b/asv_bench/benchmarks/merge.py index 656837addd8..bc60e75f33d 100644 --- a/asv_bench/benchmarks/merge.py +++ b/asv_bench/benchmarks/merge.py @@ -12,7 +12,7 @@ def setup(self, elements): d[f"var{i}"] = i self.dataset = xr.merge([d]) - def time_variable_insertion(self): + def time_variable_insertion(self, elements): dataset = self.dataset for i in range(5): dataset[f"new_var{i}"] = i From 4fe2deb90d6153de9bae5a85821ef42aaccc3f46 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Thu, 27 Oct 2022 11:07:51 -0400 Subject: [PATCH 15/16] Only add a single variable --- asv_bench/benchmarks/merge.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/merge.py b/asv_bench/benchmarks/merge.py index bc60e75f33d..e14c1e92a18 100644 --- a/asv_bench/benchmarks/merge.py +++ b/asv_bench/benchmarks/merge.py @@ -14,5 +14,4 @@ def setup(self, elements): def time_variable_insertion(self, elements): dataset = self.dataset - for i in range(5): - dataset[f"new_var{i}"] = i + dataset[f"new_var"] = 0 From 2fdf774d51cb5d7b9e7e20b58c601b3029a09b10 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Thu, 27 Oct 2022 11:12:28 -0400 Subject: [PATCH 16/16] Give the parameter a name --- asv_bench/benchmarks/merge.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/asv_bench/benchmarks/merge.py b/asv_bench/benchmarks/merge.py index e14c1e92a18..9d092e3f6f3 100644 --- a/asv_bench/benchmarks/merge.py +++ b/asv_bench/benchmarks/merge.py @@ -2,16 +2,17 @@ class DatasetAddVariable: - params = [0, 10, 100, 1000] + param_names = ["existing_elements"] + params = [[0, 10, 100, 1000]] - def setup(self, elements): + def setup(self, existing_elements): self.datasets = {} # Dictionary insertion is fast(er) than xarray.Dataset insertion d = {} - for i in range(elements): + for i in range(existing_elements): d[f"var{i}"] = i self.dataset = xr.merge([d]) - def time_variable_insertion(self, elements): + def time_variable_insertion(self, existin_elements): dataset = self.dataset - dataset[f"new_var"] = 0 + dataset["new_var"] = 0