mabel-dev · joocer · May 29, 2024 · May 29, 2024 · May 29, 2024
diff --git a/opteryx/__version__.py b/opteryx/__version__.py
@@ -1,4 +1,4 @@
-__build__ = 533
+__build__ = 534
 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

diff --git a/opteryx/functions/other_functions.py b/opteryx/functions/other_functions.py
@@ -55,7 +55,7 @@ def list_contains_all(array, items):
     return set(array).issuperset(items)
 
 
-def search(array, item):
+def search(array, item, ignore_case=[True]):
     """
     `search` provides a way to look for values across different field types, rather
     than doing a LIKE on a string, IN on a list, `search` adapts to the field type.
@@ -82,17 +82,17 @@ def search(array, item):
     else:
         return numpy.array([False], dtype=numpy.bool_)
 
-    if array_type == str:
+    if array_type in (str, bytes):
         # return True if the value is in the string
-        results_mask = compute.match_substring(array, pattern=item, ignore_case=True)
+        results_mask = compute.match_substring(array, pattern=item, ignore_case=ignore_case[0])
     elif array_type == numpy.ndarray:
         # converting to a set is faster for a handful of items which is what we're
         # almost definitely working with here - note compute.index is about 50x slower
         results_mask = numpy.array([item in set(record) for record in array], dtype=numpy.bool_)
     elif array_type == dict:
         results_mask = numpy.array([item in record.values() for record in array], dtype=numpy.bool_)
     else:
-        raise SqlError("SEARCH can only be used with VARCHAR, LIST and STRUCT.")
+        raise SqlError("SEARCH can only be used with VARCHAR, BLOB, LIST and STRUCT.")
 
     if compressed:
         # fill the result set

diff --git a/opteryx/planner/cost_based_optimizer/strategies/predicate_pushdown.py b/opteryx/planner/cost_based_optimizer/strategies/predicate_pushdown.py
@@ -10,10 +10,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+"""
+This is the predicate push-down strategy and also includes the predicate
+rewriter.
+
+"""
+import re
+
 from orso.tools import random_string
+from orso.types import OrsoTypes
 
 from opteryx.connectors.capabilities import PredicatePushable
 from opteryx.exceptions import UnsupportedSyntaxError
+from opteryx.functions import FUNCTIONS
 from opteryx.managers.expression import NodeType
 from opteryx.managers.expression import get_all_nodes_of_type
 from opteryx.models import Node
@@ -40,8 +49,16 @@ def _add_condition(existing_condition, new_condition):
 
 def _rewrite_predicate(predicate):
     """
-    Rewrite individual predicates to forms able to push to more places
+    Rewrite individual predicates to forms able to push to more places or that
+    are just faster.
     """
+    # remove adjacent wildcards
+    if (
+        predicate.value in {"Like", "ILike", "NotLike", "NotILike"}
+        and "%%" in predicate.right.value
+    ):
+        predicate.right.value = re.sub(r"%+", "%", predicate.right.value)
+
     if predicate.value in LIKE_REWRITES:
         # LIKE conditions with no wildcards => Eq
         if (
@@ -51,6 +68,50 @@ def _rewrite_predicate(predicate):
         ):
             predicate.value = LIKE_REWRITES[predicate.value]
             return predicate
+    if predicate.value == "Like" and predicate.right.value:
+        # Rewrite LIKEs as STARTS_WITH
+        if (
+            predicate.right.node_type == NodeType.LITERAL
+            and predicate.right.value[-1] == "%"
+            and predicate.right.value.count("%") == 1
+            and "_" not in predicate.right.value
+        ):
+            predicate.right.value = predicate.right.value[:-1]
+            predicate.node_type = NodeType.FUNCTION
+            predicate.value = "STARTS_WITH"
+            predicate.function = FUNCTIONS["STARTS_WITH"]
+            predicate.parameters = [predicate.left, predicate.right]
+            return predicate
+        # Rewrite LIKEs as ENDS_WITH
+        if (
+            predicate.right.node_type == NodeType.LITERAL
+            and predicate.right.value[0] == "%"
+            and predicate.right.value.count("%") == 1
+            and "_" not in predicate.right.value
+        ):
+            predicate.right.value = predicate.right.value[1:]
+            predicate.node_type = NodeType.FUNCTION
+            predicate.value = "ENDS_WITH"
+            predicate.function = FUNCTIONS["ENDS_WITH"]
+            predicate.parameters = [predicate.left, predicate.right]
+            return predicate
+        if (
+            predicate.right.node_type == NodeType.LITERAL
+            and predicate.right.value[0] == "%"
+            and predicate.right.value[-1] == "%"
+            and predicate.right.value.count("%") == 2
+            and "_" not in predicate.right.value
+        ):
+            predicate.right.value = predicate.right.value[1:-1]
+            predicate.node_type = NodeType.FUNCTION
+            predicate.value = "SEARCH"
+            predicate.function = FUNCTIONS["SEARCH"]
+            predicate.parameters = [
+                predicate.left,
+                predicate.right,
+                Node(node_type=NodeType.LITERAL, type=OrsoTypes.BOOLEAN, value=False),
+            ]
+            return predicate
     if predicate.value in IN_REWRITES:
         # IN conditions on single values => Eq
         if predicate.right.node_type == NodeType.LITERAL and len(predicate.right.value) == 1: