diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 064a43e71..54263bb97 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -6,6 +6,9 @@ Bumped minimum version of pandas and numpy to fit **ixmp4**'s requirement. ## Individual updates +- [#819](https://github.com/IAMconsortium/pyam/pull/819) Speed up `categorize()` + in line with `validate()` improvements in + [#804](https://github.com/IAMconsortium/pyam/pull/804) - [#832](https://github.com/IAMconsortium/pyam/pull/832) Improve the test-suite for the ixmp4 integration - [#827](https://github.com/IAMconsortium/pyam/pull/827) Migrate to poetry for project management - [#830](https://github.com/IAMconsortium/pyam/pull/830) Implement more consistent logging behavior with **ixmp4** diff --git a/pyam/core.py b/pyam/core.py index ee14efb39..eb9aef851 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -33,9 +33,7 @@ _group_and_agg, ) from pyam.compute import IamComputeAccessor -from pyam.filter import ( - datetime_match, -) +from pyam.filter import datetime_match from pyam.index import ( append_index_col, get_index_levels, @@ -68,7 +66,7 @@ to_list, write_sheet, ) -from pyam.validation import _apply_criteria, _exclude_on_fail, _validate +from pyam.validation import _exclude_on_fail, _validate logger = logging.getLogger(__name__) @@ -919,7 +917,15 @@ def set_meta_from_data(self, name, method=None, column="value", **kwargs): self.set_meta(meta, name) def categorize( - self, name, value, criteria, color=None, marker=None, linestyle=None + self, + name, + value, + criteria: dict = None, + *, + color=None, + marker=None, + linestyle=None, + **kwargs, ): """Assign scenarios to a category according to specific criteria @@ -940,6 +946,7 @@ def categorize( assign a linestyle to this category for plotting """ # add plotting run control + for kind, arg in [ ("color", color), ("marker", marker), @@ -947,11 +954,17 @@ def categorize( ]: if arg: run_control().update({kind: {name: {value: arg}}}) - # find all data that matches categorization - rows = _apply_criteria(self._data, criteria, in_range=True, return_test="all") - idx = make_index(rows, cols=self.index.names) - if len(idx) == 0: + # find all data that matches categorization + # TODO: if validate returned an empty index, this check would be easier + not_valid = self.validate(criteria=criteria, **kwargs) + if not_valid is None: + idx = self.index + elif len(not_valid) < len(self.index): + idx = self.index.difference( + not_valid.set_index(["model", "scenario"]).index.unique() + ) + else: logger.info("No scenarios satisfy the criteria") return