Skip to content

Commit

Permalink
Merge pull request #682 from mabel-dev/FEATURE/#570-2
Browse files Browse the repository at this point in the history
#570 De Morgans Laws
  • Loading branch information
joocer authored Nov 24, 2022
2 parents 8b634c1 + e17365d commit 72d3d33
Show file tree
Hide file tree
Showing 10 changed files with 124 additions and 29 deletions.
3 changes: 3 additions & 0 deletions opteryx/managers/expression/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@


def format_expression(root):
if root is None:
return "null"

node_type = root.token_type

# LITERAL TYPES
Expand Down
1 change: 1 addition & 0 deletions opteryx/managers/planner/optimizer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from opteryx.shared.query_statistics import QueryStatistics

RULESET: list = [
actions.apply_demorgans_law,
actions.eliminate_negations,
actions.split_conjunctive_predicates, # run after eliminate_negations
actions.eliminate_fixed_function_evaluations, # run before constant evaluations
Expand Down
1 change: 1 addition & 0 deletions opteryx/managers/planner/optimizer/actions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# limitations under the License.

# fmt: off
from .action_apply_demorgans_law import apply_demorgans_law
from .action_constant_evaluations import eliminate_constant_evaluations
from .action_function_evaluations import eliminate_fixed_function_evaluations
from .action_defragment_pages import defragment_pages
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Optimization Rule - Demorgan's Laws
Type: Heuristic
Goal: Preposition for following actions
"""
from opteryx import operators
from opteryx.managers.expression import ExpressionTreeNode, NodeType


def apply_demorgans_law(plan, properties):
"""
This action aims to create more opportunity for other rules to act on. Demorgan's
Laws allow the conversion of ORs to ANDs through Negations.
By converting ORs to ANDs the 'split conjuctive predicates' action will have more
to act on. By changing how the Negations are expressed, the 'eliminate negations'
action will have more to act on.
The core of this action is taking advantage of the following:
not (A or B) = (not A) and (not B)
"""

def update_expression_tree(node):
"""
Walk a expression tree collecting all the nodes of a specified type.
"""
# this is the main work of this action
if node.token_type == NodeType.NESTED:
return update_expression_tree(node.centre)
if node.token_type == NodeType.NOT:
centre_node = node.centre

# break out of nesting
if centre_node.token_type == NodeType.NESTED:
centre_node = centre_node.centre

# do we have a NOT (a or b)?
if centre_node.token_type == NodeType.OR:
# rewrite to (not A) and (not B)
a_side = ExpressionTreeNode(
NodeType.NOT, centre=update_expression_tree(centre_node.left)
)
b_side = ExpressionTreeNode(
NodeType.NOT, centre=update_expression_tree(centre_node.right)
)
return ExpressionTreeNode(NodeType.AND, left=a_side, right=b_side)

# below here is generic to walk the tree
node.left = None if node.left is None else update_expression_tree(node.left)
node.centre = (
None if node.centre is None else update_expression_tree(node.centre)
)
node.right = None if node.right is None else update_expression_tree(node.right)
if node.parameters:
node.parameters = [
parameter
if not isinstance(parameter, ExpressionTreeNode)
else update_expression_tree(parameter)
for parameter in node.parameters
]
return node

# find the in-scope nodes (WHERE AND HAVING)
selection_nodes = plan.get_nodes_of_type(operators.SelectionNode)

# killer questions - if any aren't met, bail
if selection_nodes is None:
return plan

# HAVING and WHERE are selection nodes
for nid in selection_nodes:
# get the node from the node_id
operator = plan.get_operator(nid)
operator.filter = update_expression_tree(operator.filter)

return plan
Original file line number Diff line number Diff line change
Expand Up @@ -99,5 +99,7 @@ def update_expression_tree(node):
# get the node from the node_id
operator = plan.get_operator(nid)
operator.filter = update_expression_tree(operator.filter)
if operator.filter is None:
plan.remove_operator(nid)

return plan
14 changes: 12 additions & 2 deletions opteryx/operators/page_defragment_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,11 @@
page size.
"""
import time

from typing import Iterable

import pyarrow
from opteryx.exceptions import SqlError

from opteryx.operators import BasePlanNode

Expand Down Expand Up @@ -73,25 +75,33 @@ def execute(self) -> Iterable:

if page.num_rows > 0:

start = time.monotonic_ns()
# add what we've collected before to the table
if collected_rows: # pragma: no cover
self.statistics.page_merges += 1
page = pyarrow.concat_tables([collected_rows, page], promote=True)
collected_rows = None
self.statistics.time_defragmenting += time.monotonic_ns() - start

# work out some stats about what we have
page_bytes = page.nbytes
page_records = page.num_rows

# if we're more than double the target size, let's do something
if page_bytes > (PAGE_SIZE * HIGH_WATER): # pragma: no cover
start = time.monotonic_ns()

average_record_size = page_bytes / page_records
new_row_count = int(PAGE_SIZE / average_record_size)
row_counter += new_row_count
self.statistics.page_splits += 1
yield page.slice(offset=0, length=new_row_count)
new_page = page.slice(offset=0, length=new_row_count)
at_least_one_page = True
collected_rows = page.slice(offset=new_row_count)

self.statistics.time_defragmenting += time.monotonic_ns() - start

yield new_page
# if we're less than 75% of the page size, save hold what we have so
# far and go collect the next page
elif page_bytes < (PAGE_SIZE * LOW_WATER):
Expand Down
23 changes: 2 additions & 21 deletions opteryx/operators/selection_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

from opteryx.attribute_types import TOKEN_TYPES
from opteryx.exceptions import SqlError
from opteryx.managers.expression import evaluate
from opteryx.managers.expression import evaluate, format_expression
from opteryx.models import QueryProperties
from opteryx.operators import BasePlanNode

Expand All @@ -35,29 +35,10 @@ class SelectionNode(BasePlanNode):
def __init__(self, properties: QueryProperties, **config):
super().__init__(properties=properties)
self.filter = config.get("filter")
self._unfurled_filter = None
self._mapped_filter = None

@property
def config(self): # pragma: no cover
def _inner_config(predicate):
if isinstance(predicate, tuple):
if len(predicate) > 1 and predicate[1] == TOKEN_TYPES.IDENTIFIER:
return f"`{predicate[0]}`"
if len(predicate) > 1 and predicate[1] == TOKEN_TYPES.VARCHAR:
return f'"{predicate[0]}"'
if len(predicate) == 2:
if predicate[0] == "Not":
return f"NOT {_inner_config(predicate[1])}"
return f"{predicate[0]}"
return "(" + " ".join(_inner_config(p) for p in predicate) + ")"
if isinstance(predicate, list):
if len(predicate) == 1:
return _inner_config(predicate[0])
return "[" + ",".join(_inner_config(p) for p in predicate) + "]"
return f"{predicate}"

return _inner_config(self.filter)
return format_expression(self.filter)

@property
def name(self): # pragma: no cover
Expand Down
12 changes: 7 additions & 5 deletions opteryx/shared/query_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,12 @@ def __init__(self):

# time spent on various steps
self.time_planning: int = 0
self.time_selecting: float = 0
self.time_aggregating: float = 0
self.time_ordering: float = 0
self.time_evaluating: float = 0
self.time_optimizing: float = 0
self.time_selecting: int = 0
self.time_aggregating: int = 0
self.time_ordering: int = 0
self.time_evaluating: int = 0
self.time_optimizing: int = 0
self.time_defragmenting: int = 0

self.start_time: int = 0
self.end_time: int = 0
Expand Down Expand Up @@ -120,6 +121,7 @@ def as_dict(self):
"time_ordering": self._ns_to_s(self.time_ordering),
"time_evaluating": self._ns_to_s(self.time_evaluating),
"time_optimizing": self._ns_to_s(self.time_optimizing),
"time_defragmenting": self._ns_to_s(self.time_defragmenting),
"partitions_found": self.partitions_found,
"partitions_scanned": self.partitions_scanned,
"partitions_read": self.partitions_read,
Expand Down
2 changes: 1 addition & 1 deletion opteryx/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@
"""

# __version__ = "0.4.0-alpha.6"
__version__ = "0.7.0-beta.3"
__version__ = "0.7.0-beta.4"
5 changes: 5 additions & 0 deletions tests/sql_battery/test_shapes_and_errors_battery.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,11 @@
("SELECT id FROM $planets WHERE NOT NOT (id < 5 AND id = 3)", 1, 1, None),
("SELECT id FROM $planets WHERE NOT id = 2 AND NOT NOT (id < 5 AND id = 3)", 1, 1, None),
("SET enable_optimizer = false; SELECT id FROM $planets WHERE NOT id = 2 AND NOT NOT (id < 5 AND id = 3)", 1, 1, None),
("SELECT * FROM $planets WHERE NOT(id = 9 OR id = 8)", 7, 20, None),
("SELECT * FROM $planets WHERE NOT(id = 9 OR id = 8) OR True", 9, 20, None),
("SELECT * FROM $planets WHERE NOT(id = 9 OR 8 = 8)", 8, 20, None),
("SELECT * FROM $planets WHERE 1 = 1", 9, 20, None),
("SELECT * FROM $planets WHERE NOT 1 = 2", 9, 20, None),

("SHOW CREATE TABLE $planets", 1, 1, None),
("SHOW CREATE TABLE $satellites", 1, 1, None),
Expand Down

0 comments on commit 72d3d33

Please sign in to comment.