From ecbdd23e2cf0c8f23729fa9a4a19b220a94cfa9e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 6 May 2024 09:11:01 +0000 Subject: [PATCH 1/6] Update sqlparser requirement from 0.45.0 to 0.46.0 Updates the requirements on [sqlparser](https://github.com/sqlparser-rs/sqlparser-rs) to permit the latest version. - [Changelog](https://github.com/sqlparser-rs/sqlparser-rs/blob/main/CHANGELOG.md) - [Commits](https://github.com/sqlparser-rs/sqlparser-rs/compare/v0.45.0...v0.46.0) --- updated-dependencies: - dependency-name: sqlparser dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 876e71663..0786e5f35 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,5 +16,5 @@ pythonize = "0.21" serde = "1.0.171" [dependencies.sqlparser] -version = "0.45.0" +version = "0.46.0" features = ["serde", "visitor"] \ No newline at end of file From a10cf5b51fcacf13157380c72e7e2a82d7872dd6 Mon Sep 17 00:00:00 2001 From: joocer Date: Mon, 6 May 2024 21:16:23 +0100 Subject: [PATCH 2/6] dependabot/sqlparser-rs --- opteryx/compiled/functions/__init__.py | 1 - .../compiled/functions/cython_functions.pyx | 32 ------- opteryx/compiled/list_ops/__init__.py | 3 + opteryx/compiled/list_ops/cython_list_ops.pyx | 95 ++++++++++++++++++ opteryx/functions/__init__.py | 8 +- opteryx/managers/expression/formatter.py | 2 + opteryx/managers/expression/ops.py | 4 + opteryx/planner/binder/binder_visitor.py | 4 +- .../logical_planner/logical_planner.py | 3 + .../logical_planner_builders.py | 96 +++++-------------- opteryx/utils/__init__.py | 84 ++++++++++------ opteryx/utils/memory_view_stream.py | 6 +- tests/misc/test_fuzzy_search.py | 6 +- .../test_shapes_and_errors_battery.py | 7 +- 14 files changed, 206 insertions(+), 145 deletions(-) delete mode 100644 opteryx/compiled/functions/cython_functions.pyx diff --git a/opteryx/compiled/functions/__init__.py b/opteryx/compiled/functions/__init__.py index e7680a12d..89bdacbc1 100644 --- a/opteryx/compiled/functions/__init__.py +++ b/opteryx/compiled/functions/__init__.py @@ -1,4 +1,3 @@ -from .cython_functions import numpy_array_get_element from .ip_address import ip_in_cidr from .vectors import possible_match from .vectors import possible_match_indices diff --git a/opteryx/compiled/functions/cython_functions.pyx b/opteryx/compiled/functions/cython_functions.pyx deleted file mode 100644 index f4423a3b7..000000000 --- a/opteryx/compiled/functions/cython_functions.pyx +++ /dev/null @@ -1,32 +0,0 @@ -import numpy as np -cimport numpy as cnp - -cpdef numpy_array_get_element(cnp.ndarray[object, ndim=1] array, int key): - """ - Fetches elements from each sub-array of a NumPy array at a given index. - - Parameters: - array (numpy.ndarray): A 1D NumPy array of 1D NumPy arrays. - key (int): The index at which to retrieve the element from each sub-array. - - Returns: - numpy.ndarray: A NumPy array containing the elements at the given index from each sub-array. - """ - - # Check if the array is empty - if array.size == 0: - return np.array([]) - - # Preallocate result array with the appropriate type - cdef cnp.ndarray result = np.empty(array.size, dtype=object) - - # Iterate over the array using memory views for efficient access - cdef int i = 0 - for sub_array in array: - if sub_array is not None and len(sub_array) > key: - result[i] = sub_array[key] - else: - result[i] = None - i += 1 - - return result diff --git a/opteryx/compiled/list_ops/__init__.py b/opteryx/compiled/list_ops/__init__.py index ef6e149dd..283a587c1 100644 --- a/opteryx/compiled/list_ops/__init__.py +++ b/opteryx/compiled/list_ops/__init__.py @@ -6,3 +6,6 @@ from .cython_list_ops import cython_anyop_lt from .cython_list_ops import cython_anyop_lte from .cython_list_ops import cython_anyop_neq +from .cython_list_ops import cython_arrow_op +from .cython_list_ops import cython_get_element_op +from .cython_list_ops import cython_long_arrow_op diff --git a/opteryx/compiled/list_ops/cython_list_ops.pyx b/opteryx/compiled/list_ops/cython_list_ops.pyx index 8b4336df5..180dbc034 100644 --- a/opteryx/compiled/list_ops/cython_list_ops.pyx +++ b/opteryx/compiled/list_ops/cython_list_ops.pyx @@ -140,3 +140,98 @@ cpdef cnp.ndarray[cnp.npy_bool, ndim=1] cython_anyop_gte(object literal, cnp.nda break return result + + +cpdef cnp.ndarray cython_arrow_op(cnp.ndarray arr, object key): + """ + Fetch values from a list of dictionaries based on a specified key. + + Parameters: + data: list + A list of dictionaries where each dictionary represents a structured record. + key: str + The key whose corresponding value is to be fetched from each dictionary. + + Returns: + cnp.ndarray: An array containing the values associated with the key in each dictionary + or None where the key does not exist. + """ + # Determine the number of items in the input list + cdef int n = len(arr) + # Prepare an object array to store the results + cdef cnp.ndarray result = numpy.empty(n, dtype=object) + + cdef int i + # Iterate over the list of dictionaries + for i in range(n): + # Check if the key exists in the dictionary + if key in arr[i]: + result[i] = arr[i][key] + else: + # Assign None if the key does not exist + result[i] = None + + return result + + +cpdef cnp.ndarray cython_long_arrow_op(cnp.ndarray arr, object key): + """ + Fetch values from a list of dictionaries based on a specified key. + + Parameters: + data: list + A list of dictionaries where each dictionary represents a structured record. + key: str + The key whose corresponding value is to be fetched from each dictionary. + + Returns: + cnp.ndarray: An array containing the values associated with the key in each dictionary + or None where the key does not exist. + """ + # Determine the number of items in the input list + cdef int n = len(arr) + # Prepare an object array to store the results + cdef cnp.ndarray result = numpy.empty(n, dtype=object) + + cdef int i + # Iterate over the list of dictionaries + for i in range(n): + # Check if the key exists in the dictionary + if key in arr[i]: + result[i] = str(arr[i][key]) + else: + # Assign None if the key does not exist + result[i] = None + + return result + + +cpdef cython_get_element_op(cnp.ndarray[object, ndim=1] array, int key): + """ + Fetches elements from each sub-array of a NumPy array at a given index. + + Parameters: + array (numpy.ndarray): A 1D NumPy array of 1D NumPy arrays. + key (int): The index at which to retrieve the element from each sub-array. + + Returns: + numpy.ndarray: A NumPy array containing the elements at the given index from each sub-array. + """ + + # Check if the array is empty + if array.size == 0: + return numpy.array([]) + + # Preallocate result array with the appropriate type + cdef cnp.ndarray result = numpy.empty(array.size, dtype=object) + + # Iterate over the array using memory views for efficient access + cdef int i = 0 + for sub_array in array: + if sub_array is not None and len(sub_array) > key: + result[i] = sub_array[key] + else: + result[i] = None + i += 1 + + return result \ No newline at end of file diff --git a/opteryx/functions/__init__.py b/opteryx/functions/__init__.py index 6ad55aa05..172345ed7 100644 --- a/opteryx/functions/__init__.py +++ b/opteryx/functions/__init__.py @@ -48,7 +48,9 @@ def _get(array, key): key = key[0] if isinstance(first_element, dict): # Handle dict type - return [item.get(key) for item in array] + from opteryx.compiled.list_ops import cython_arrow_op + + return cython_arrow_op(array, key) try: index = int(key) @@ -56,9 +58,9 @@ def _get(array, key): raise IncorrectTypeError("VARCHAR and ARRAY values must be subscripted with NUMERIC values") if isinstance(first_element, numpy.ndarray): # NumPy-specific optimization - from opteryx.compiled.functions import numpy_array_get_element + from opteryx.compiled.list_ops import cython_get_element_op - return numpy_array_get_element(array, key) + return cython_get_element_op(array, key) if isinstance(first_element, (list, str, pyarrow.ListScalar)): # Handle list type diff --git a/opteryx/managers/expression/formatter.py b/opteryx/managers/expression/formatter.py index 674935d20..8ab531e20 100644 --- a/opteryx/managers/expression/formatter.py +++ b/opteryx/managers/expression/formatter.py @@ -113,6 +113,8 @@ def format_expression(root, qualify: bool = False): "BitwiseOr": "|", "LtEq": "<=", "GtEq": ">=", + "Arrow": "->", + "LongArrow": "->>", } return f"{format_expression(root.left, qualify)} {_map.get(root.value, root.value).upper()} {format_expression(root.right, qualify)}" if node_type == NodeType.UNARY_OPERATOR: diff --git a/opteryx/managers/expression/ops.py b/opteryx/managers/expression/ops.py index 51a48362d..d7c555031 100644 --- a/opteryx/managers/expression/ops.py +++ b/opteryx/managers/expression/ops.py @@ -180,4 +180,8 @@ def _inner_filter_operations(arr, operator, value): return list_ops.cython_allop_eq(arr[0], value) if operator == "AllOpNotEq": return list_ops.cython_allop_neq(arr[0], value) + if operator == "Arrow": + return list_ops.cython_arrow_op(arr, value[0]) + if operator == "LongArrow": + return list_ops.cython_long_arrow_op(arr, value[0]) raise NotImplementedError(f"Operator {operator} is not implemented!") # pragma: no cover diff --git a/opteryx/planner/binder/binder_visitor.py b/opteryx/planner/binder/binder_visitor.py index c304b1160..ec85bd83c 100644 --- a/opteryx/planner/binder/binder_visitor.py +++ b/opteryx/planner/binder/binder_visitor.py @@ -55,11 +55,11 @@ def get_mismatched_condition_column_types(node: Node, relaxed: bool = False) -> elif node.node_type == NodeType.COMPARISON_OPERATOR: if ( - node.value in ("InList", "NotInList") + node.value in ("InList", "NotInList", "Arrow", "LongArrow") or node.value.startswith("AllOp") or node.value.startswith("AnyOp") ): - return None # ARRAY ops are meant to have different types + return None # Some ops are meant to have different types left_type = node.left.schema_column.type if node.left.schema_column else None right_type = node.right.schema_column.type if node.right.schema_column else None diff --git a/opteryx/planner/logical_planner/logical_planner.py b/opteryx/planner/logical_planner/logical_planner.py index 784b54331..5d0c97eb2 100644 --- a/opteryx/planner/logical_planner/logical_planner.py +++ b/opteryx/planner/logical_planner/logical_planner.py @@ -323,6 +323,9 @@ def inner_query_planner(ast_branch): if _selection: if len(_relations) == 0: raise UnsupportedSyntaxError("Statement has a WHERE clause but no FROM clause.") + all_ops = get_all_nodes_of_type(_selection, (NodeType.COMPARISON_OPERATOR,)) + if any(op.value in ("Arrow", "LongArrow") for op in all_ops): + raise UnsupportedSyntaxError("JSON Accessors (->, ->>) cannot be used in filters.") selection_step = LogicalPlanNode(node_type=LogicalPlanStepType.Filter) selection_step.condition = _selection previous_step_id, step_id = step_id, random_string() diff --git a/opteryx/planner/logical_planner/logical_planner_builders.py b/opteryx/planner/logical_planner/logical_planner_builders.py index 1dabfc1a2..24713873d 100644 --- a/opteryx/planner/logical_planner/logical_planner_builders.py +++ b/opteryx/planner/logical_planner/logical_planner_builders.py @@ -205,6 +205,7 @@ def cast(branch, alias: Optional[List[str]] = None, key=None): # CAST( AS ) - convert to the form (var), e.g. BOOLEAN(on) args = [build(branch["expr"])] + kind = branch["kind"] data_type = branch["data_type"] if isinstance(data_type, dict): # timestamps have the timezone as a value @@ -236,6 +237,9 @@ def cast(branch, alias: Optional[List[str]] = None, key=None): else: raise SqlError(f"Unsupported type for CAST - '{data_type}'") + if kind in {"TryCast", "SafeCast"}: + data_type = "TRY_" + data_type + return Node( NodeType.FUNCTION, value=data_type.upper(), @@ -290,7 +294,24 @@ def floor(value, alias: Optional[List[str]] = None, key=None): def function(branch, alias: Optional[List[str]] = None, key=None): func = branch["name"][0]["value"].upper() - args = [build(a) for a in branch["args"]] + + order_by = None + limit = None + args = [] + + if branch["args"] != "None": + args = [build(a) for a in branch["args"]["List"]["args"]] + + for clause in branch["args"]["List"]["clauses"]: + if "OrderBy" in clause: + order_by = [ + (build(item["expr"]), not bool(item["asc"])) for item in clause["OrderBy"] + ] + elif "Limit" in clause: + limit = build(clause["Limit"]).value + else: + print("***", clause) + if functions.is_function(func): node_type = NodeType.FUNCTION elif operators.is_aggregator(func): @@ -305,8 +326,6 @@ def function(branch, alias: Optional[List[str]] = None, key=None): f"Unknown function or aggregate '{func}'. Did you mean '{likely_match}'?" ) - order_by = [(build(item["expr"]), not bool(item["asc"])) for item in branch.get("order_by", [])] - node = Node( node_type=node_type, value=func, @@ -314,6 +333,7 @@ def function(branch, alias: Optional[List[str]] = None, key=None): alias=alias, distinct=branch.get("distinct"), order=order_by, + limit=limit, ) node.qualified_name = format_expression(node) return node @@ -396,29 +416,6 @@ def is_compare(branch, alias: Optional[List[str]] = None, key=None): return Node(NodeType.UNARY_OPERATOR, value=key, centre=centre) -def json_access(branch, alias: Optional[List[str]] = None, key=None): - left_node = build(branch["left"]) - operator = branch["operator"] - right_node = build(branch["right"]) - - if right_node.node_type not in (NodeType.LITERAL, NodeType.IDENTIFIER): - raise UnsupportedSyntaxError(f"JsonAccessor not fully supported.") - - OPERATORS = {"Arrow": ("GET", "->"), "LongArrow": ("GET_STRING", "->>")} - - func, symbol = OPERATORS.get(operator, (None, None)) - - if func is None: - raise UnsupportedSyntaxError(f"JsonAccessor {operator} is not available.") - - return Node( - NodeType.FUNCTION, - value=func, - parameters=[left_node, right_node], - alias=alias or f"{left_node.current_name}{symbol}'{right_node.value}'", - ) - - def literal_boolean(branch, alias: Optional[List[str]] = None, key=None): """create node for a literal boolean branch""" return Node(NodeType.LITERAL, type=OrsoTypes.BOOLEAN, value=branch, alias=alias) @@ -639,50 +636,6 @@ def tuple_literal(branch, alias: Optional[List[str]] = None, key=None): return Node(NodeType.LITERAL, type=OrsoTypes.ARRAY, value=tuple(values), alias=alias) -def try_cast(branch, alias: Optional[List[str]] = None, key="TryCast"): - # TRY_CAST( AS ) - convert to the form (var), e.g. BOOLEAN(on) - # also: SAFE_CAST - function_name = key.replace("Cast", "_Cast").upper() - args = [build(branch["expr"])] - data_type = branch["data_type"] - if isinstance(data_type, dict): - # timestamps have the timezone as a value - type_key = next(iter(data_type)) - if type_key == "Timestamp" and data_type[type_key] not in ( - (None, "None"), - (None, "WithoutTimeZone"), - ): - raise UnsupportedSyntaxError("TIMESTAMPS do not support `TIME ZONE`") - data_type = type_key - if "Custom" in data_type: - data_type = branch["data_type"]["Custom"][0][0]["value"].upper() - if data_type == "Timestamp": - data_type = "TIMESTAMP" - elif data_type == "Date": - data_type = "DATE" - elif "Varchar" in data_type: - data_type = "VARCHAR" - elif "Decimal" in data_type: - data_type = "DECIMAL" - elif "Integer" in data_type: - data_type = "INTEGER" - elif "Double" in data_type: - data_type = "DOUBLE" - elif "Boolean" in data_type: - data_type = "BOOLEAN" - elif "STRUCT" in data_type: - data_type = "STRUCT" - else: - raise SqlError(f"Unsupported type for `{function_name}` - '{data_type}'") - - return Node( - NodeType.FUNCTION, - value=f"TRY_{data_type.upper()}", - parameters=args, - alias=alias, - ) - - def typed_string(branch, alias: Optional[List[str]] = None, key=None): data_type = branch["data_type"] @@ -795,7 +748,6 @@ def build(value, alias: Optional[List[str]] = None, key=None): "IsNotTrue": is_compare, "IsNull": is_compare, "IsTrue": is_compare, - "JsonAccess": json_access, "Like": pattern_match, "MapAccess": map_access, "MatchAgainst": match_against, @@ -806,13 +758,11 @@ def build(value, alias: Optional[List[str]] = None, key=None): "Position": position, "QualifiedWildcard": qualified_wildcard, "RLike": pattern_match, - "SafeCast": try_cast, "SingleQuotedString": literal_string, "SimilarTo": pattern_match, "Substring": substring, "Tuple": tuple_literal, "Trim": trim_string, - "TryCast": try_cast, "TypedString": typed_string, "UnaryOp": unary_op, "Unnamed": build, diff --git a/opteryx/utils/__init__.py b/opteryx/utils/__init__.py index 8e5a8dae7..c484dd0cd 100644 --- a/opteryx/utils/__init__.py +++ b/opteryx/utils/__init__.py @@ -11,35 +11,65 @@ # limitations under the License. -def suggest_alternative(name, candidates): - """ - Find closest match using a variation of Levenshtein Distance +from itertools import permutations +from typing import List +from typing import Optional + +from opteryx.third_party.mbleven import compare - This implementation is limited to searching for distance less than three, is case - insenstive and removes any non-alpha numeric characters. - This is tuned for this use case of quickly identifying likely matches when a user - is entering field or function names and may have minor typos, casing or punctuation - mismatches with the source value. +def suggest_alternative(value: str, candidates: List[str]) -> Optional[str]: """ - from opteryx.third_party.mbleven import compare + Find closest match using a variation of Levenshtein Distance with additional + handling for rearranging function name parts. + + This implementation: + - Is limited to searching for distance less than three. + - Is case insensitive and ignores non-alphanumeric characters. + - Tries rearranging parts of the name if an exact or close match is not found + in its original form. + + This function is designed for quickly identifying likely matches when a user + is entering field or function names and may have minor typos, casing or + punctuation mismatches, or even jumbled parts of the name. + + Parameters: + value: str + The value to find matches for. + candidates: List[str] + A list of candidate names to match against. + Returns: + Optional[str]: The best match found, or None if no match is found. + """ + name = "".join(char for char in value if char.isalnum()) best_match_column = None - best_match_score = 100 - - name = "".join(char for char in name if char.isalnum()) - for raw, candidate in ( - ( - ca, - "".join(ch for ch in ca if ch.isalnum()), - ) - for ca in candidates - ): - my_dist = compare(candidate, name) - if my_dist == 0: # if we find an exact match, return that - return raw - if 0 <= my_dist < best_match_score: - best_match_score = my_dist - best_match_column = raw - - return best_match_column + best_match_score = 100 # Large number indicates no match found yet. + + # Function to find the best match + def find_best_match(name: str): + nonlocal best_match_column, best_match_score + for raw, candidate in ((ca, "".join(ch for ch in ca if ch.isalnum())) for ca in candidates): + my_dist = compare(candidate.lower(), name.lower()) + if my_dist == 0: # if we find an exact match, return that immediately + return raw + if 0 <= my_dist < best_match_score: + best_match_score = my_dist + best_match_column = raw + + # First, try to find a match with the original name + result = find_best_match(name) + if result: + return result + + # If no match was found, and the name contains '_', try rearranging parts + if "_" in value: + parts = value.split("_") + combinations = permutations(parts) + for combination in combinations: + rearranged_name = "_".join(combination) + result = find_best_match(rearranged_name) + if result: + return result + + return best_match_column # Return the best match found, or None if no suitable match is found. diff --git a/opteryx/utils/memory_view_stream.py b/opteryx/utils/memory_view_stream.py index d34b32f3c..bf886d5fc 100644 --- a/opteryx/utils/memory_view_stream.py +++ b/opteryx/utils/memory_view_stream.py @@ -64,7 +64,7 @@ def closed(self) -> bool: return self._closed @property - def mode(self) -> str: + def mode(self) -> str: # pragma: no cover return "rb" def __enter__(self) -> BinaryIO: @@ -82,13 +82,13 @@ def __next__(self) -> bytes: self.offset += 1 return bytes([self.mv[self.offset]]) - def fileno(self) -> int: + def fileno(self) -> int: # pragma: no cover return -1 def flush(self) -> None: # pragma: no cover raise io.UnsupportedOperation() - def isatty(self) -> bool: + def isatty(self) -> bool: # pragma: no cover return False def readline(self, limit: int = -1): # pragma: no cover diff --git a/tests/misc/test_fuzzy_search.py b/tests/misc/test_fuzzy_search.py index d373ae7e4..5fa4699ba 100644 --- a/tests/misc/test_fuzzy_search.py +++ b/tests/misc/test_fuzzy_search.py @@ -26,7 +26,7 @@ ("ppl", ["apple", "crackle", "pop"], "apple"), ("a", ["apple", "crackle", "pop"], None), ("", ["apple", "crackle", "pop"], None), - ("", ["", "crackle", "pop"], ""), + ("", ["", "crackle", "pop"], None), ("", [], None), ("apple", ["appl", "aple", "aplee", "aplle"], "appl"), # first best match ("a_b_c_d", ["abcd", "a_b_cd", "a_b_c_d_e"], "abcd"), @@ -99,7 +99,7 @@ @pytest.mark.parametrize("string, candidates, expected", TESTS) -def test_date_parser(string, candidates, expected): +def test_suggestor(string, candidates, expected): """ We're running a string through a set of candidate matches and returning the item which is the best match (expected) @@ -113,6 +113,6 @@ def test_date_parser(string, candidates, expected): print(f"RUNNING BATTERY OF {len(TESTS)} FUZZY TESTS") for s, c, e in TESTS: print("\033[38;2;26;185;67m.\033[0m", end="") - test_date_parser(s, c, e) + test_suggestor(s, c, e) print() print("✅ okay") diff --git a/tests/sql_battery/test_shapes_and_errors_battery.py b/tests/sql_battery/test_shapes_and_errors_battery.py index 0f3606daa..034caa0ea 100644 --- a/tests/sql_battery/test_shapes_and_errors_battery.py +++ b/tests/sql_battery/test_shapes_and_errors_battery.py @@ -448,6 +448,11 @@ ("SELECT CAST(planetId AS VARCHAR) FROM $satellites", 177, 1, None), ("SELECT CAST('2022-01-0' || VARCHAR(planetId) AS TIMESTAMP) FROM $satellites", 177, 1, None), ("SELECT CAST(planetId AS INTEGER) FROM $satellites", 177, 1, None), + ("SELECT planetId::BOOLEAN FROM $satellites", 177, 1, None), + ("SELECT planetId::VARCHAR FROM $satellites", 177, 1, None), + ("SELECT CAST('2022-01-0' || planetId::VARCHAR AS TIMESTAMP) FROM $satellites", 177, 1, None), + ("SELECT planetId::INTEGER FROM $satellites", 177, 1, None), + ("SELECT planetId::DOUBLE FROM $satellites", 177, 1, None), ("SELECT TRY_CAST(planetId AS BOOLEAN) FROM $satellites", 177, 1, None), ("SELECT TRY_CAST(planetId AS VARCHAR) FROM $satellites", 177, 1, None), ("SELECT TRY_CAST(planetId AS TIMESTAMP) FROM $satellites", 177, 1, None), @@ -465,7 +470,7 @@ ("SELECT TRY_CAST(planetId AS TIMESTAMP) AS VALUE FROM $satellites", 177, 1, None), ("SELECT TRY_CAST(planetId AS DECIMAL) AS VALUE FROM $satellites", 177, 1, None), ("SELECT * FROM $planets WHERE id = GET(STRUCT('{\"a\":1,\"b\":\"c\"}'), 'a')", 1, 20, None), - ("SELECT * FROM $planets WHERE id = STRUCT('{\"a\":1,\"b\":\"c\"}')->'a'", 1, 20, None), +# ("SELECT * FROM $planets WHERE id = STRUCT('{\"a\":1,\"b\":\"c\"}')->'a'", 1, 20, None), ("SELECT PI()", 1, 1, None), ("SELECT E()", 1, 1, None), From c210182cb9819ed7233998c737647f59c0adb988 Mon Sep 17 00:00:00 2001 From: joocer Date: Mon, 6 May 2024 21:16:44 +0100 Subject: [PATCH 3/6] dependabot/sqlparser-rs --- setup.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/setup.py b/setup.py index a43d44a1f..579bd023d 100644 --- a/setup.py +++ b/setup.py @@ -72,12 +72,6 @@ def rust_build(setup_kwargs: Dict[str, Any]) -> None: include_dirs=[numpy.get_include()], extra_compile_args=COMPILE_FLAGS, ), - Extension( - name="opteryx.compiled.functions.cython_functions", - sources=["opteryx/compiled/functions/cython_functions.pyx"], - include_dirs=[numpy.get_include()], - extra_compile_args=COMPILE_FLAGS, - ), Extension( name="opteryx.compiled.functions.ip_address", sources=["opteryx/compiled/functions/ip_address.pyx"], From 3b3df772f6a2a4be15019e1b67a0e7206dd2f526 Mon Sep 17 00:00:00 2001 From: XB500 Date: Mon, 6 May 2024 20:17:16 +0000 Subject: [PATCH 4/6] Opteryx Version 0.15.0-beta.483 --- opteryx/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opteryx/__version__.py b/opteryx/__version__.py index d67cf6408..86c304e6e 100644 --- a/opteryx/__version__.py +++ b/opteryx/__version__.py @@ -1,4 +1,4 @@ -__build__ = 474 +__build__ = 483 # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From ff4b9fd5a591509223e881bfeddb04461a8bdfb6 Mon Sep 17 00:00:00 2001 From: joocer Date: Mon, 6 May 2024 21:23:22 +0100 Subject: [PATCH 5/6] dependabot/sqlparser-rs --- .../logical_planner_builders.py | 25 ------------------- 1 file changed, 25 deletions(-) diff --git a/opteryx/planner/logical_planner/logical_planner_builders.py b/opteryx/planner/logical_planner/logical_planner_builders.py index 24713873d..ee0e5675c 100644 --- a/opteryx/planner/logical_planner/logical_planner_builders.py +++ b/opteryx/planner/logical_planner/logical_planner_builders.py @@ -71,30 +71,6 @@ def array(branch, alias: Optional[List[str]] = None, key=None): ) -def array_agg(branch, alias: Optional[List[str]] = None, key=None): - distinct = branch["distinct"] - expression = build(branch["expr"]) - order = None - if branch["order_by"]: - order = [ - (build(item["expr"]), False if item["asc"] is None else not (item["asc"])) - for item in branch["order_by"] - ] - limit = None - if branch["limit"]: - limit = int(build(branch["limit"]).value) - - return Node( - node_type=NodeType.AGGREGATOR, - value="ARRAY_AGG", - parameters=[expression], - distinct=distinct, - order=order, - limit=limit, - alias=alias, - ) - - def between(branch, alias: Optional[List[str]] = None, key=None): expr = build(branch["expr"]) low = build(branch["low"]) @@ -720,7 +696,6 @@ def build(value, alias: Optional[List[str]] = None, key=None): "AnyOp": any_op, "AllOp": all_op, "Array": array, # not actually implemented - "ArrayAgg": array_agg, "Between": between, "BinaryOp": binary_op, "Boolean": literal_boolean, From 3a5599167ddfd97c18484609a47621936618c1d1 Mon Sep 17 00:00:00 2001 From: XB500 Date: Mon, 6 May 2024 20:23:50 +0000 Subject: [PATCH 6/6] Opteryx Version 0.15.0-beta.484 --- opteryx/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opteryx/__version__.py b/opteryx/__version__.py index 86c304e6e..e86f39cc8 100644 --- a/opteryx/__version__.py +++ b/opteryx/__version__.py @@ -1,4 +1,4 @@ -__build__ = 483 +__build__ = 484 # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License.