diff --git a/opteryx/managers/expression/__init__.py b/opteryx/managers/expression/__init__.py index 30b84df9d..37a6723f7 100644 --- a/opteryx/managers/expression/__init__.py +++ b/opteryx/managers/expression/__init__.py @@ -42,6 +42,9 @@ def format_expression(root): + if root is None: + return "null" + node_type = root.token_type # LITERAL TYPES diff --git a/opteryx/managers/planner/optimizer/__init__.py b/opteryx/managers/planner/optimizer/__init__.py index deb94bfff..76cdde00d 100644 --- a/opteryx/managers/planner/optimizer/__init__.py +++ b/opteryx/managers/planner/optimizer/__init__.py @@ -15,6 +15,7 @@ from opteryx.shared.query_statistics import QueryStatistics RULESET: list = [ + actions.apply_demorgans_law, actions.eliminate_negations, actions.split_conjunctive_predicates, # run after eliminate_negations actions.eliminate_fixed_function_evaluations, # run before constant evaluations diff --git a/opteryx/managers/planner/optimizer/actions/__init__.py b/opteryx/managers/planner/optimizer/actions/__init__.py index d3808f821..7ce0b1b27 100644 --- a/opteryx/managers/planner/optimizer/actions/__init__.py +++ b/opteryx/managers/planner/optimizer/actions/__init__.py @@ -11,6 +11,7 @@ # limitations under the License. # fmt: off +from .action_apply_demorgans_law import apply_demorgans_law from .action_constant_evaluations import eliminate_constant_evaluations from .action_function_evaluations import eliminate_fixed_function_evaluations from .action_defragment_pages import defragment_pages diff --git a/opteryx/managers/planner/optimizer/actions/action_apply_demorgans_law.py b/opteryx/managers/planner/optimizer/actions/action_apply_demorgans_law.py new file mode 100644 index 000000000..14cf8d381 --- /dev/null +++ b/opteryx/managers/planner/optimizer/actions/action_apply_demorgans_law.py @@ -0,0 +1,90 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Optimization Rule - Demorgan's Laws + +Type: Heuristic +Goal: Preposition for following actions +""" +from opteryx import operators +from opteryx.managers.expression import ExpressionTreeNode, NodeType + + +def apply_demorgans_law(plan, properties): + """ + This action aims to create more opportunity for other rules to act on. Demorgan's + Laws allow the conversion of ORs to ANDs through Negations. + + By converting ORs to ANDs the 'split conjuctive predicates' action will have more + to act on. By changing how the Negations are expressed, the 'eliminate negations' + action will have more to act on. + + The core of this action is taking advantage of the following: + + not (A or B) = (not A) and (not B) + """ + + def update_expression_tree(node): + """ + Walk a expression tree collecting all the nodes of a specified type. + """ + # this is the main work of this action + if node.token_type == NodeType.NESTED: + return update_expression_tree(node.centre) + if node.token_type == NodeType.NOT: + centre_node = node.centre + + # break out of nesting + if centre_node.token_type == NodeType.NESTED: + centre_node = centre_node.centre + + # do we have a NOT (a or b)? + if centre_node.token_type == NodeType.OR: + # rewrite to (not A) and (not B) + a_side = ExpressionTreeNode( + NodeType.NOT, centre=update_expression_tree(centre_node.left) + ) + b_side = ExpressionTreeNode( + NodeType.NOT, centre=update_expression_tree(centre_node.right) + ) + return ExpressionTreeNode(NodeType.AND, left=a_side, right=b_side) + + # below here is generic to walk the tree + node.left = None if node.left is None else update_expression_tree(node.left) + node.centre = ( + None if node.centre is None else update_expression_tree(node.centre) + ) + node.right = None if node.right is None else update_expression_tree(node.right) + if node.parameters: + node.parameters = [ + parameter + if not isinstance(parameter, ExpressionTreeNode) + else update_expression_tree(parameter) + for parameter in node.parameters + ] + return node + + # find the in-scope nodes (WHERE AND HAVING) + selection_nodes = plan.get_nodes_of_type(operators.SelectionNode) + + # killer questions - if any aren't met, bail + if selection_nodes is None: + return plan + + # HAVING and WHERE are selection nodes + for nid in selection_nodes: + # get the node from the node_id + operator = plan.get_operator(nid) + operator.filter = update_expression_tree(operator.filter) + + return plan diff --git a/opteryx/managers/planner/optimizer/actions/action_constant_evaluations.py b/opteryx/managers/planner/optimizer/actions/action_constant_evaluations.py index a07039a8e..b36224d31 100644 --- a/opteryx/managers/planner/optimizer/actions/action_constant_evaluations.py +++ b/opteryx/managers/planner/optimizer/actions/action_constant_evaluations.py @@ -99,5 +99,7 @@ def update_expression_tree(node): # get the node from the node_id operator = plan.get_operator(nid) operator.filter = update_expression_tree(operator.filter) + if operator.filter is None: + plan.remove_operator(nid) return plan diff --git a/opteryx/operators/page_defragment_node.py b/opteryx/operators/page_defragment_node.py index 9e1f6495a..f8ec6bc9d 100644 --- a/opteryx/operators/page_defragment_node.py +++ b/opteryx/operators/page_defragment_node.py @@ -34,9 +34,11 @@ page size. """ +import time + from typing import Iterable + import pyarrow -from opteryx.exceptions import SqlError from opteryx.operators import BasePlanNode @@ -73,11 +75,13 @@ def execute(self) -> Iterable: if page.num_rows > 0: + start = time.monotonic_ns() # add what we've collected before to the table if collected_rows: # pragma: no cover self.statistics.page_merges += 1 page = pyarrow.concat_tables([collected_rows, page], promote=True) collected_rows = None + self.statistics.time_defragmenting += time.monotonic_ns() - start # work out some stats about what we have page_bytes = page.nbytes @@ -85,13 +89,19 @@ def execute(self) -> Iterable: # if we're more than double the target size, let's do something if page_bytes > (PAGE_SIZE * HIGH_WATER): # pragma: no cover + start = time.monotonic_ns() + average_record_size = page_bytes / page_records new_row_count = int(PAGE_SIZE / average_record_size) row_counter += new_row_count self.statistics.page_splits += 1 - yield page.slice(offset=0, length=new_row_count) + new_page = page.slice(offset=0, length=new_row_count) at_least_one_page = True collected_rows = page.slice(offset=new_row_count) + + self.statistics.time_defragmenting += time.monotonic_ns() - start + + yield new_page # if we're less than 75% of the page size, save hold what we have so # far and go collect the next page elif page_bytes < (PAGE_SIZE * LOW_WATER): diff --git a/opteryx/operators/selection_node.py b/opteryx/operators/selection_node.py index 9abb186b0..6b6b60850 100644 --- a/opteryx/operators/selection_node.py +++ b/opteryx/operators/selection_node.py @@ -26,7 +26,7 @@ from opteryx.attribute_types import TOKEN_TYPES from opteryx.exceptions import SqlError -from opteryx.managers.expression import evaluate +from opteryx.managers.expression import evaluate, format_expression from opteryx.models import QueryProperties from opteryx.operators import BasePlanNode @@ -35,29 +35,10 @@ class SelectionNode(BasePlanNode): def __init__(self, properties: QueryProperties, **config): super().__init__(properties=properties) self.filter = config.get("filter") - self._unfurled_filter = None - self._mapped_filter = None @property def config(self): # pragma: no cover - def _inner_config(predicate): - if isinstance(predicate, tuple): - if len(predicate) > 1 and predicate[1] == TOKEN_TYPES.IDENTIFIER: - return f"`{predicate[0]}`" - if len(predicate) > 1 and predicate[1] == TOKEN_TYPES.VARCHAR: - return f'"{predicate[0]}"' - if len(predicate) == 2: - if predicate[0] == "Not": - return f"NOT {_inner_config(predicate[1])}" - return f"{predicate[0]}" - return "(" + " ".join(_inner_config(p) for p in predicate) + ")" - if isinstance(predicate, list): - if len(predicate) == 1: - return _inner_config(predicate[0]) - return "[" + ",".join(_inner_config(p) for p in predicate) + "]" - return f"{predicate}" - - return _inner_config(self.filter) + return format_expression(self.filter) @property def name(self): # pragma: no cover diff --git a/opteryx/shared/query_statistics.py b/opteryx/shared/query_statistics.py index b67a3bd14..28e35275d 100644 --- a/opteryx/shared/query_statistics.py +++ b/opteryx/shared/query_statistics.py @@ -62,11 +62,12 @@ def __init__(self): # time spent on various steps self.time_planning: int = 0 - self.time_selecting: float = 0 - self.time_aggregating: float = 0 - self.time_ordering: float = 0 - self.time_evaluating: float = 0 - self.time_optimizing: float = 0 + self.time_selecting: int = 0 + self.time_aggregating: int = 0 + self.time_ordering: int = 0 + self.time_evaluating: int = 0 + self.time_optimizing: int = 0 + self.time_defragmenting: int = 0 self.start_time: int = 0 self.end_time: int = 0 @@ -120,6 +121,7 @@ def as_dict(self): "time_ordering": self._ns_to_s(self.time_ordering), "time_evaluating": self._ns_to_s(self.time_evaluating), "time_optimizing": self._ns_to_s(self.time_optimizing), + "time_defragmenting": self._ns_to_s(self.time_defragmenting), "partitions_found": self.partitions_found, "partitions_scanned": self.partitions_scanned, "partitions_read": self.partitions_read, diff --git a/opteryx/version.py b/opteryx/version.py index d8ddb5190..9a104a9b6 100644 --- a/opteryx/version.py +++ b/opteryx/version.py @@ -17,4 +17,4 @@ """ # __version__ = "0.4.0-alpha.6" -__version__ = "0.7.0-beta.3" +__version__ = "0.7.0-beta.4" diff --git a/tests/sql_battery/test_shapes_and_errors_battery.py b/tests/sql_battery/test_shapes_and_errors_battery.py index 66331edfb..1a6420cd5 100644 --- a/tests/sql_battery/test_shapes_and_errors_battery.py +++ b/tests/sql_battery/test_shapes_and_errors_battery.py @@ -626,6 +626,11 @@ ("SELECT id FROM $planets WHERE NOT NOT (id < 5 AND id = 3)", 1, 1, None), ("SELECT id FROM $planets WHERE NOT id = 2 AND NOT NOT (id < 5 AND id = 3)", 1, 1, None), ("SET enable_optimizer = false; SELECT id FROM $planets WHERE NOT id = 2 AND NOT NOT (id < 5 AND id = 3)", 1, 1, None), + ("SELECT * FROM $planets WHERE NOT(id = 9 OR id = 8)", 7, 20, None), + ("SELECT * FROM $planets WHERE NOT(id = 9 OR id = 8) OR True", 9, 20, None), + ("SELECT * FROM $planets WHERE NOT(id = 9 OR 8 = 8)", 8, 20, None), + ("SELECT * FROM $planets WHERE 1 = 1", 9, 20, None), + ("SELECT * FROM $planets WHERE NOT 1 = 2", 9, 20, None), ("SHOW CREATE TABLE $planets", 1, 1, None), ("SHOW CREATE TABLE $satellites", 1, 1, None),