Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#1698 #1705

Merged
merged 2 commits into from
May 29, 2024
Merged

#1698 #1705

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion opteryx/__version__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__build__ = 533
__build__ = 534

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
8 changes: 4 additions & 4 deletions opteryx/functions/other_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def list_contains_all(array, items):
return set(array).issuperset(items)


def search(array, item):
def search(array, item, ignore_case=[True]):
"""
`search` provides a way to look for values across different field types, rather
than doing a LIKE on a string, IN on a list, `search` adapts to the field type.
Expand All @@ -82,17 +82,17 @@ def search(array, item):
else:
return numpy.array([False], dtype=numpy.bool_)

if array_type == str:
if array_type in (str, bytes):
# return True if the value is in the string
results_mask = compute.match_substring(array, pattern=item, ignore_case=True)
results_mask = compute.match_substring(array, pattern=item, ignore_case=ignore_case[0])
elif array_type == numpy.ndarray:
# converting to a set is faster for a handful of items which is what we're
# almost definitely working with here - note compute.index is about 50x slower
results_mask = numpy.array([item in set(record) for record in array], dtype=numpy.bool_)
elif array_type == dict:
results_mask = numpy.array([item in record.values() for record in array], dtype=numpy.bool_)
else:
raise SqlError("SEARCH can only be used with VARCHAR, LIST and STRUCT.")
raise SqlError("SEARCH can only be used with VARCHAR, BLOB, LIST and STRUCT.")

if compressed:
# fill the result set
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.

"""
This is the predicate push-down strategy and also includes the predicate
rewriter.

"""
import re

from orso.tools import random_string
from orso.types import OrsoTypes

from opteryx.connectors.capabilities import PredicatePushable
from opteryx.exceptions import UnsupportedSyntaxError
from opteryx.functions import FUNCTIONS
from opteryx.managers.expression import NodeType
from opteryx.managers.expression import get_all_nodes_of_type
from opteryx.models import Node
Expand All @@ -40,8 +49,16 @@ def _add_condition(existing_condition, new_condition):

def _rewrite_predicate(predicate):
"""
Rewrite individual predicates to forms able to push to more places
Rewrite individual predicates to forms able to push to more places or that
are just faster.
"""
# remove adjacent wildcards
if (
predicate.value in {"Like", "ILike", "NotLike", "NotILike"}
and "%%" in predicate.right.value
):
predicate.right.value = re.sub(r"%+", "%", predicate.right.value)

if predicate.value in LIKE_REWRITES:
# LIKE conditions with no wildcards => Eq
if (
Expand All @@ -51,6 +68,50 @@ def _rewrite_predicate(predicate):
):
predicate.value = LIKE_REWRITES[predicate.value]
return predicate
if predicate.value == "Like" and predicate.right.value:
# Rewrite LIKEs as STARTS_WITH
if (
predicate.right.node_type == NodeType.LITERAL
and predicate.right.value[-1] == "%"
and predicate.right.value.count("%") == 1
and "_" not in predicate.right.value
):
predicate.right.value = predicate.right.value[:-1]
predicate.node_type = NodeType.FUNCTION
predicate.value = "STARTS_WITH"
predicate.function = FUNCTIONS["STARTS_WITH"]
predicate.parameters = [predicate.left, predicate.right]
return predicate
# Rewrite LIKEs as ENDS_WITH
if (
predicate.right.node_type == NodeType.LITERAL
and predicate.right.value[0] == "%"
and predicate.right.value.count("%") == 1
and "_" not in predicate.right.value
):
predicate.right.value = predicate.right.value[1:]
predicate.node_type = NodeType.FUNCTION
predicate.value = "ENDS_WITH"
predicate.function = FUNCTIONS["ENDS_WITH"]
predicate.parameters = [predicate.left, predicate.right]
return predicate
if (
predicate.right.node_type == NodeType.LITERAL
and predicate.right.value[0] == "%"
and predicate.right.value[-1] == "%"
and predicate.right.value.count("%") == 2
and "_" not in predicate.right.value
):
predicate.right.value = predicate.right.value[1:-1]
predicate.node_type = NodeType.FUNCTION
predicate.value = "SEARCH"
predicate.function = FUNCTIONS["SEARCH"]
predicate.parameters = [
predicate.left,
predicate.right,
Node(node_type=NodeType.LITERAL, type=OrsoTypes.BOOLEAN, value=False),
]
return predicate
if predicate.value in IN_REWRITES:
# IN conditions on single values => Eq
if predicate.right.node_type == NodeType.LITERAL and len(predicate.right.value) == 1:
Expand Down
Loading