diff --git a/opteryx/__version__.py b/opteryx/__version__.py index 446fcc6fb..652605fd9 100644 --- a/opteryx/__version__.py +++ b/opteryx/__version__.py @@ -1,4 +1,4 @@ -__build__ = 428 +__build__ = 430 # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/opteryx/components/binder/binder.py b/opteryx/components/binder/binder.py index 755226c94..b022a3a5e 100644 --- a/opteryx/components/binder/binder.py +++ b/opteryx/components/binder/binder.py @@ -339,7 +339,13 @@ def inner_binder(node: Node, context: Any) -> Tuple[Node, Any]: node.query_column = node.alias or column_name identifiers = get_all_nodes_of_type(node, (NodeType.IDENTIFIER,)) - node.relations = {col.source for col in identifiers if col.source is not None} + sources = [] + for col in identifiers: + if col.source is not None: + sources.append(col.source) + if col.schema_column is not None: + sources.extend(col.schema_column.origin) + node.relations = set(sources) context.schemas = schemas return node, context diff --git a/opteryx/components/binder/binder_visitor.py b/opteryx/components/binder/binder_visitor.py index f856de206..d939fe73f 100644 --- a/opteryx/components/binder/binder_visitor.py +++ b/opteryx/components/binder/binder_visitor.py @@ -837,9 +837,11 @@ def visit_subquery(self, node: Node, context: BindingContext) -> Tuple[Node, Bin ), None, ) + if not schema_column.origin: + schema_column.origin = [] source_relations.extend(schema_column.origin or []) projection_column.source = node.alias - schema_column.origin = [node.alias] + schema_column.origin += [node.alias] schema_column.name = ( projection_column.current_name if projection_column else schema_column.name @@ -858,7 +860,7 @@ def visit_subquery(self, node: Node, context: BindingContext) -> Tuple[Node, Bin schema = RelationSchema(name=node.alias, columns=columns) context.schemas = {"$derived": derived.schema(), node.alias: schema} - context.relations = {node.alias} + context.relations.add(node.alias) node.schema = schema node.source_relations = set(source_relations) return node, context diff --git a/opteryx/components/cost_based_optimizer/strategies/predicate_pushdown.py b/opteryx/components/cost_based_optimizer/strategies/predicate_pushdown.py index 80cdfe657..a1454c95f 100644 --- a/opteryx/components/cost_based_optimizer/strategies/predicate_pushdown.py +++ b/opteryx/components/cost_based_optimizer/strategies/predicate_pushdown.py @@ -80,11 +80,7 @@ def visit(self, node: LogicalPlanNode, context: OptimizerContext) -> OptimizerCo if not context.optimized_plan: context.optimized_plan = context.pre_optimized_tree.copy() # type: ignore - if node.node_type in ( - LogicalPlanStepType.Scan, - LogicalPlanStepType.FunctionDataset, - LogicalPlanStepType.Subquery, - ): + if node.node_type in (LogicalPlanStepType.Scan, LogicalPlanStepType.FunctionDataset): # Handle predicates specific to node types context = self._handle_predicates(node, context) @@ -236,7 +232,7 @@ def _handle_predicates( ) -> OptimizerContext: remaining_predicates = [] for predicate in context.collected_predicates: - if len(predicate.relations) == 1 and predicate.relations.intersection( + if len(predicate.relations) >= 1 and predicate.relations.intersection( (node.relation, node.alias) ): if node.connector: diff --git a/opteryx/components/cost_based_optimizer/strategies/split_conjunctive_predicates.py b/opteryx/components/cost_based_optimizer/strategies/split_conjunctive_predicates.py index f1325b5d5..a6fa3298d 100644 --- a/opteryx/components/cost_based_optimizer/strategies/split_conjunctive_predicates.py +++ b/opteryx/components/cost_based_optimizer/strategies/split_conjunctive_predicates.py @@ -61,7 +61,14 @@ def visit(self, node: LogicalPlanNode, context: OptimizerContext) -> OptimizerCo new_node.columns = get_all_nodes_of_type( predicate, select_nodes=(NodeType.IDENTIFIER,) ) - new_node.relations = {c.source for c in new_node.columns} + + sources = [] + for col in new_node.columns: + if col.source is not None: + sources.append(col.source) + if col.schema_column is not None: + sources.extend(col.schema_column.origin) + new_node.relations = set(sources) new_nodes.append(new_node) else: new_nodes = [node] diff --git a/opteryx/models/connection_context.py b/opteryx/models/connection_context.py index df1696c9e..046b6b983 100644 --- a/opteryx/models/connection_context.py +++ b/opteryx/models/connection_context.py @@ -13,6 +13,7 @@ import datetime from dataclasses import dataclass from dataclasses import field +from typing import Iterable from typing import List from typing import Tuple @@ -51,7 +52,7 @@ class ConnectionContext: connected_at: datetime.datetime = field(default_factory=datetime.datetime.utcnow, init=False) user: str = None schema: str = None - memberships: str = None + memberships: Iterable[str] = None variables: SystemVariablesContainer = field(init=False) history: List[HistoryItem] = field(default_factory=list, init=False) diff --git a/requirements.txt b/requirements.txt index 888a29592..7e2fe3355 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ cython -numpy +numpy==1.* orjson -orso>=0.0.147 +orso>=0.0.151 pyarrow>=12.0.1 typer==0.11.* diff --git a/tests/plan_optimization/test_predicate_pushdown_sqlite.py b/tests/plan_optimization/test_predicate_pushdown_sqlite.py index bd1018c6a..d7e5ecf76 100644 --- a/tests/plan_optimization/test_predicate_pushdown_sqlite.py +++ b/tests/plan_optimization/test_predicate_pushdown_sqlite.py @@ -4,11 +4,13 @@ import os import sys +import time sys.path.insert(1, os.path.join(sys.path[0], "../..")) import opteryx from opteryx.connectors import SqlConnector +from opteryx.utils.formatter import format_sql opteryx.register_store( "sqlite", @@ -17,98 +19,91 @@ connection="sqlite:///testdata/sqlite/database.db", ) - -def test_predicate_pushdowns_sqlite_eq(): - """ - This is the same test as the collection pushdown - but on a different dataset - """ - - conn = opteryx.connect() - - cur = conn.cursor() - cur.execute("SELECT * FROM sqlite.planets WHERE name = 'Mercury';") - # when pushdown is enabled, we only read the matching rows from the source - assert cur.rowcount == 1, cur.rowcount - assert cur.stats.get("rows_read", 0) == 1, cur.stats - - cur = conn.cursor() - cur.execute("SELECT * FROM sqlite.planets WHERE name = 'Mercury' AND gravity = 3.7;") - # test with a two part filter - assert cur.rowcount == 1, cur.rowcount - assert cur.stats.get("rows_read", 0) == 1, cur.stats - - cur = conn.cursor() - cur.execute( - "SELECT * FROM sqlite.planets WHERE name = 'Mercury' AND gravity = 3.7 AND escapeVelocity = 5.0;" - ) - # test with A three part filter - assert cur.rowcount == 0, cur.rowcount - assert cur.stats.get("rows_read", 0) == 0, cur.stats - - cur = conn.cursor() - cur.execute( - "SELECT * FROM sqlite.planets WHERE gravity = 3.7 AND name IN ('Mercury', 'Venus');" - ) - # we don't push all predicates down, - assert cur.rowcount == 1, cur.rowcount - assert cur.stats.get("rows_read", 0) == 2, cur.stats - - cur = conn.cursor() - cur.execute("SELECT * FROM sqlite.planets WHERE surfacePressure IS NULL;") - # We push unary ops to SQL - assert cur.rowcount == 4, cur.rowcount - assert cur.stats.get("rows_read", 0) == 4, cur.stats - - cur = conn.cursor() - cur.execute( - "SELECT * FROM sqlite.planets WHERE orbitalInclination IS FALSE AND name IN ('Earth', 'Mars');" - ) - # We push unary ops to SQL - assert cur.rowcount == 1, cur.rowcount - assert cur.stats.get("rows_read", 0) == 1, cur.stats - - conn.close() - - -def test_predicate_pushdown_sqlite_other(): - res = opteryx.query("SELECT * FROM sqlite.planets WHERE gravity <= 3.7") - assert res.rowcount == 3, res.rowcount - assert res.stats.get("rows_read", 0) == 3, res.stats - - res = opteryx.query("SELECT * FROM sqlite.planets WHERE name != 'Earth'") - assert res.rowcount == 8, res.rowcount - assert res.stats.get("rows_read", 0) == 8, res.stats - - res = opteryx.query("SELECT * FROM sqlite.planets WHERE name != 'E\"arth'") - assert res.rowcount == 9, res.rowcount - assert res.stats.get("rows_read", 0) == 9, res.stats - - res = opteryx.query("SELECT * FROM sqlite.planets WHERE gravity != 3.7") - assert res.rowcount == 7, res.rowcount - assert res.stats.get("rows_read", 0) == 7, res.stats - - res = opteryx.query("SELECT * FROM sqlite.planets WHERE gravity < 3.7") - assert res.rowcount == 1, res.rowcount - assert res.stats.get("rows_read", 0) == 1, res.stats - - res = opteryx.query("SELECT * FROM sqlite.planets WHERE gravity > 3.7") - assert res.rowcount == 6, res.rowcount - assert res.stats.get("rows_read", 0) == 6, res.stats - - res = opteryx.query("SELECT * FROM sqlite.planets WHERE gravity >= 3.7") - assert res.rowcount == 8, res.rowcount - assert res.stats.get("rows_read", 0) == 8, res.stats - - res = opteryx.query("SELECT * FROM sqlite.planets WHERE name LIKE '%a%'") - assert res.rowcount == 4, res.rowcount - assert res.stats.get("rows_read", 0) == 4, res.stats - - res = opteryx.query("SELECT * FROM sqlite.planets WHERE id > gravity") - assert res.rowcount == 2, res.rowcount - assert res.stats.get("rows_read", 0) == 9, res.stats +test_cases = [ + ("SELECT * FROM sqlite.planets WHERE name = 'Mercury';", 1, 1), + ("SELECT * FROM sqlite.planets WHERE name = 'Mercury' AND gravity = 3.7;", 1, 1), + ( + "SELECT * FROM sqlite.planets WHERE name = 'Mercury' AND gravity = 3.7 AND escapeVelocity = 5.0;", + 0, + 0, + ), + ("SELECT * FROM sqlite.planets WHERE gravity = 3.7 AND name IN ('Mercury', 'Venus');", 1, 2), + ("SELECT * FROM sqlite.planets WHERE surfacePressure IS NULL;", 4, 4), + ( + "SELECT * FROM sqlite.planets WHERE orbitalInclination IS FALSE AND name IN ('Earth', 'Mars');", + 1, + 1, + ), + ("SELECT * FROM (SELECT name FROM sqlite.planets) AS $temp WHERE name = 'Earth';", 1, 1), + ("SELECT * FROM sqlite.planets WHERE gravity <= 3.7", 3, 3), + ("SELECT * FROM sqlite.planets WHERE name != 'Earth'", 8, 8), + ("SELECT * FROM sqlite.planets WHERE name != 'E\"arth'", 9, 9), + ("SELECT * FROM sqlite.planets WHERE gravity != 3.7", 7, 7), + ("SELECT * FROM sqlite.planets WHERE gravity < 3.7", 1, 1), + ("SELECT * FROM sqlite.planets WHERE gravity > 3.7", 6, 6), + ("SELECT * FROM sqlite.planets WHERE gravity >= 3.7", 8, 8), + ("SELECT * FROM sqlite.planets WHERE name LIKE '%a%'", 4, 4), + ("SELECT * FROM sqlite.planets WHERE id > gravity", 2, 9), +] + + +import pytest + + +@pytest.mark.parametrize("statement,expected_rowcount,expected_rows_read", test_cases) +def test_predicate_pushdown_postgres_parameterized( + statement, expected_rowcount, expected_rows_read +): + res = opteryx.query(statement) + assert res.rowcount == expected_rowcount, f"Expected {expected_rowcount}, got {res.rowcount}" + assert ( + res.stats.get("rows_read", 0) == expected_rows_read + ), f"Expected {expected_rows_read}, got {res.stats.get('rows_read', 0)}" if __name__ == "__main__": # pragma: no cover - from tests.tools import run_tests - - run_tests() + import shutil + + from tests.tools import trunc_printable + + start_suite = time.monotonic_ns() + passed = 0 + failed = 0 + + width = shutil.get_terminal_size((80, 20))[0] - 15 + + print(f"RUNNING BATTERY OF {len(test_cases)} TESTS") + for index, (statement, returned_rows, read_rows) in enumerate(test_cases): + print( + f"\033[38;2;255;184;108m{(index + 1):04}\033[0m" + f" {trunc_printable(format_sql(statement), width - 1)}", + end="", + flush=True, + ) + try: + start = time.monotonic_ns() + test_predicate_pushdown_postgres_parameterized(statement, returned_rows, read_rows) + print( + f"\033[38;2;26;185;67m{str(int((time.monotonic_ns() - start)/1e6)).rjust(4)}ms\033[0m ✅", + end="", + ) + passed += 1 + if failed > 0: + print(" \033[0;31m*\033[0m") + else: + print() + except Exception as err: + print(f"\033[0;31m{str(int((time.monotonic_ns() - start)/1e6)).rjust(4)}ms ❌ *\033[0m") + print(">", err) + failed += 1 + + print("--- ✅ \033[0;32mdone\033[0m") + + if failed > 0: + print("\n\033[38;2;139;233;253m\033[3mFAILURES\033[0m") + + print( + f"\n\033[38;2;139;233;253m\033[3mCOMPLETE\033[0m ({((time.monotonic_ns() - start_suite) / 1e9):.2f} seconds)\n" + f" \033[38;2;26;185;67m{passed} passed ({(passed * 100) // (passed + failed)}%)\033[0m\n" + f" \033[38;2;255;121;198m{failed} failed\033[0m" + ) diff --git a/tests/sql_battery/test_results_battery.py b/tests/sql_battery/test_results_battery.py index 186ace134..af835f1df 100644 --- a/tests/sql_battery/test_results_battery.py +++ b/tests/sql_battery/test_results_battery.py @@ -25,6 +25,7 @@ import orjson import opteryx +from opteryx.utils.formatter import format_sql OS_SEP = os.sep @@ -57,7 +58,7 @@ def test_results_tests(test): ), f"Outcome:\n{printable_result}\nExpected:\n{printable_expected}" -if __name__ == "__main__": # pragma: no cover +if __name__ == "__dmain__": # pragma: no cover import shutil import time @@ -77,3 +78,73 @@ def test_results_tests(test): print(f"\033[0;32m{str(int((time.monotonic_ns() - start)/1000000)).rjust(4)}ms\033[0m ✅") print("--- ✅ \033[0;32mdone\033[0m") + + +if __name__ == "__main__": # pragma: no cover + """ + Running in the IDE we do some formatting - it's not functional but helps + when reading the outputs. + """ + + import shutil + import time + + from tests.tools import trunc_printable + + start_suite = time.monotonic_ns() + + width = shutil.get_terminal_size((80, 20))[0] - 45 + + passed = 0 + failed = 0 + + nl = "\n" + + failures = [] + + print(f"RUNNING BATTERY OF {len(RESULTS_TESTS)} RESULTS TESTS") + for index, test in enumerate(RESULTS_TESTS): + + printable = test["statement"] + test_id = test["file"].split(OS_SEP)[-1].split(".")[0][0:25].ljust(25) + if hasattr(printable, "decode"): + printable = printable.decode() + print( + f"\033[38;2;255;184;108m{(index + 1):04}\033[0m", + f"\033[0;35m{test_id}\033[0m", + f" {trunc_printable(format_sql(printable), width - 1)}", + end="", + flush=True, + ) + try: + start = time.monotonic_ns() + test_results_tests(test) + print( + f"\033[38;2;26;185;67m{str(int((time.monotonic_ns() - start)/1e6)).rjust(4)}ms\033[0m ✅", + end="", + ) + passed += 1 + if failed > 0: + print(" \033[0;31m*\033[0m") + else: + print() + except Exception as err: + print(f"\033[0;31m{str(int((time.monotonic_ns() - start)/1e6)).rjust(4)}ms ❌ *\033[0m") + print(">", err) + failed += 1 + failures.append((test_id, test["statement"], err)) + + print("--- ✅ \033[0;32mdone\033[0m") + + if failed > 0: + print("\n\033[38;2;139;233;253m\033[3mFAILURES\033[0m") + for test, statement, err in failures: + print( + f"\033[38;2;26;185;67m{test}\033[0m\n{format_sql(statement)}\n\033[38;2;255;121;198m{err}\033[0m\n" + ) + + print( + f"\n\033[38;2;139;233;253m\033[3mCOMPLETE\033[0m ({((time.monotonic_ns() - start_suite) / 1e9):.2f} seconds)\n" + f" \033[38;2;26;185;67m{passed} passed ({(passed * 100) // (passed + failed)}%)\033[0m\n" + f" \033[38;2;255;121;198m{failed} failed\033[0m" + ) diff --git a/tests/sql_battery/tests/planner.run_tests_disabled b/tests/sql_battery/tests/planner.run_tests_disabled index bb2497dee..a1b53569a 100644 --- a/tests/sql_battery/tests/planner.run_tests_disabled +++ b/tests/sql_battery/tests/planner.run_tests_disabled @@ -1,3 +1,3 @@ SELECT * FROM $planets UNION SELECT * FROM $planets; SELECT * FROM $planets LEFT ANTI JOIN $satellites ON id = id; -# EXPLAIN ANALYZE FORMAT JSON SELECT * FROM $planets AS a INNER JOIN (SELECT id FROM $planets) AS b USING (id); \ No newline at end of file +EXPLAIN ANALYZE FORMAT JSON SELECT * FROM $planets AS a INNER JOIN (SELECT id FROM $planets) AS b USING (id); \ No newline at end of file diff --git a/tests/sql_battery/tests/regression.run_tests b/tests/sql_battery/tests/regression.run_tests index 888d2e562..e537ea252 100644 --- a/tests/sql_battery/tests/regression.run_tests +++ b/tests/sql_battery/tests/regression.run_tests @@ -55,7 +55,7 @@ SELECT DATEDIFF('days', TIMESTAMP "2022-01-02", TIMESTAMP "2010-10-01") FROM $as SELECT DATEDIFF('days', TIMESTAMP("2022-01-02"), CAST("2010-10-01" AS TIMESTAMP)); # [TEMPORAL FILTER EXTRACTION PROBLEMS] -SET @planet = 'Saturn'; SELECT name AS nom, bigsats.occurances, smallsats.occurances FROM (SELECT DISTINCT id as planetId, name FROM $planets WHERE name = @planet) as planets LEFT JOIN (SELECT planetId, COUNT(*) AS occurances FROM $satellites FOR DATES BETWEEN '2022-01-01' AND TODAY WHERE gm > 10 GROUP BY planetId) AS bigsats ON bigsats.planetId = planets.planetId LEFT JOIN (SELECT planetId, COUNT(*) AS occurances FROM $satellites FOR DATES IN LAST_MONTH WHERE gm < 10 GROUP BY planetId) as smallsats ON smallsats.planetId = planets.planetId; +SET @planet = 'Saturn'; SELECT name AS nom, bigsats.occurances, smallsats.occurances FROM (SELECT DISTINCT id as planetId, name FROM $planets WHERE name = @planet) as planets LEFT JOIN (SELECT planetId, COUNT(*) AS occurances FROM $satellites s1 FOR DATES BETWEEN '2022-01-01' AND TODAY WHERE gm > 10 GROUP BY planetId) AS bigsats ON bigsats.planetId = planets.planetId LEFT JOIN (SELECT planetId, COUNT(*) AS occurances FROM $satellites s2 FOR DATES IN LAST_MONTH WHERE gm < 10 GROUP BY planetId) as smallsats ON smallsats.planetId = planets.planetId; SELECT 'SELECT * FROM $planets FOR TODAY' FROM (SELECT 'FOR TODAY') AS SQ CROSS JOIN $planets FOR YESTERDAY; # testing NOTs diff --git a/tests/sql_battery/tests/results/complex_002.results_tests b/tests/sql_battery/tests/results/complex_002.results_tests index 6526641a6..b400685e4 100644 --- a/tests/sql_battery/tests/results/complex_002.results_tests +++ b/tests/sql_battery/tests/results/complex_002.results_tests @@ -1,6 +1,46 @@ { "summary": "Retrieve planets with specific gravity and diameter metrics, along with count and average radius of high GM satellites and average magnitude of visible satellites.", - "statement": "SELECT p.name AS planet_name, p.diameter, high_gm_stats.high_gm_satellites_count, high_gm_stats.avg_high_gm_radius, visible_stats.avg_magnitude FROM $planets p LEFT JOIN (SELECT planetId, COUNT(*) AS high_gm_satellites_count, AVG(radius) AS avg_high_gm_radius FROM $satellites WHERE gm > 5 GROUP BY planetId) high_gm_stats ON p.id = high_gm_stats.planetId LEFT JOIN (SELECT planetId, AVG(magnitude) AS avg_magnitude FROM $satellites WHERE magnitude < 2.0 GROUP BY planetId) visible_stats ON p.id = visible_stats.planetId WHERE p.diameter > 100 AND p.gravity BETWEEN 0.5 AND 2.0 ORDER BY high_gm_stats.high_gm_satellites_count DESC, visible_stats.avg_magnitude ASC;", + "statement": " +SELECT + p.name AS planet_name, + p.diameter, + high_gm_stats.high_gm_satellites_count, + high_gm_stats.avg_high_gm_radius, + visible_stats.avg_magnitude +FROM + $planets p +LEFT JOIN + (SELECT + planetId, + COUNT(*) AS high_gm_satellites_count, + AVG(radius) AS avg_high_gm_radius + FROM + $satellites s1 + WHERE + gm > 5 + GROUP BY + planetId) AS high_gm_stats +ON + p.id = high_gm_stats.planetId +LEFT JOIN + (SELECT + planetId, + AVG(magnitude) AS avg_magnitude + FROM + $satellites s2 + WHERE + magnitude < 2.0 + GROUP BY + planetId) AS visible_stats +ON + p.id = visible_stats.planetId +WHERE + p.diameter > 100 AND + p.gravity BETWEEN 0.5 AND 2.0 +ORDER BY + high_gm_stats.high_gm_satellites_count DESC, + visible_stats.avg_magnitude ASC; + ", "result": { "planet_name": ["Pluto"], "p.diameter": [2370], diff --git a/tests/sql_battery/tests/results/complex_003.results_tests b/tests/sql_battery/tests/results/complex_003.results_tests index 841bb1a59..e1fe4988e 100644 --- a/tests/sql_battery/tests/results/complex_003.results_tests +++ b/tests/sql_battery/tests/results/complex_003.results_tests @@ -1,6 +1,51 @@ { "summary": "This query retrieves planets with their orbital periods and diameters, along with counts and averages of their dense and bright satellites. It filters planets based on their distance from the sun and orbital eccentricity.", - "statement": "SELECT pl.name AS planet_name, pl.orbital_period, pl.diameter, dense_moons_stats.total_dense_moons, dense_moons_stats.avg_density, bright_moons_stats.avg_magnitude, bright_moons_stats.total_bright_moons FROM $planets pl LEFT JOIN (SELECT planetId, COUNT(*) AS total_dense_moons, AVG(density) AS avg_density FROM $satellites WHERE density > 2 GROUP BY planetId) dense_moons_stats ON pl.id = dense_moons_stats.planetId LEFT JOIN (SELECT planetId, AVG(magnitude) AS avg_magnitude, COUNT(*) AS total_bright_moons FROM $satellites WHERE magnitude < 5 GROUP BY planetId) bright_moons_stats ON pl.id = bright_moons_stats.planetId WHERE pl.distance_from_sun BETWEEN 100 AND 200 AND pl.orbital_eccentricity < 0.1 ORDER BY dense_moons_stats.total_dense_moons DESC, bright_moons_stats.avg_magnitude ASC LIMIT 10;", + "statement": " +SELECT + pl.name AS planet_name, + pl.orbital_period, + pl.diameter, + dense_moons_stats.total_dense_moons, + dense_moons_stats.avg_density, + bright_moons_stats.avg_magnitude, + bright_moons_stats.total_bright_moons +FROM + $planets pl +LEFT JOIN + (SELECT + planetId, + COUNT(*) AS total_dense_moons, + AVG(density) AS avg_density + FROM + $satellites s1 + WHERE + density > 2 + GROUP BY + planetId) AS dense_moons_stats +ON + pl.id = dense_moons_stats.planetId +LEFT JOIN + (SELECT + planetId, + AVG(magnitude) AS avg_magnitude, + COUNT(*) AS total_bright_moons + FROM + $satellites s2 + WHERE + magnitude < 5 + GROUP BY + planetId) AS bright_moons_stats +ON + pl.id = bright_moons_stats.planetId +WHERE + pl.distance_from_sun BETWEEN 100 AND 200 AND + pl.orbital_eccentricity < 0.1 +ORDER BY + dense_moons_stats.total_dense_moons DESC, + bright_moons_stats.avg_magnitude ASC +LIMIT 10; + + ", "result": { "bright_moons_stats.avg_magnitude": [-12.74, null], "bright_moons_stats.total_bright_moons": [1, null], diff --git a/tests/sql_battery/tests/results/complex_004.results_tests b/tests/sql_battery/tests/results/complex_004.results_tests new file mode 100644 index 000000000..bc44d988b --- /dev/null +++ b/tests/sql_battery/tests/results/complex_004.results_tests @@ -0,0 +1,30 @@ +{ + "summary": "This was an AI generated test case to perform a non-trival query", + "statement": " +SELECT + p.name AS planet_name, + p.density AS planet_density, + p.gravity AS planet_gravity, + p.meanTemperature AS mean_temperature, + SUM(CASE WHEN s.albedo > 0.3 THEN 1 ELSE 0 END) AS high_albedo_satellites, + AVG(CASE WHEN s.magnitude < 2 THEN s.magnitude ELSE NULL END) AS avg_visible_magnitude +FROM + $planets p +JOIN + $satellites s ON p.id = s.planetId +WHERE + p.escapeVelocity > 0 AND p.surfacePressure < 0.1 +GROUP BY + p.name, p.density, p.gravity, p.meanTemperature +ORDER BY + high_albedo_satellites DESC, avg_visible_magnitude; +", + "result": { + "planet_name": ["Pluto", "Mars"], + "planet_density": [2095, 3933], + "planet_gravity": [0.7, 0.3], + "mean_temperature": [-225, -63], + "high_albedo_satellites": [5, 0], + "avg_visible_magnitude": [null, null], + } +} diff --git a/tests/sql_battery/tests/results/complex_005.results_tests b/tests/sql_battery/tests/results/complex_005.results_tests new file mode 100644 index 000000000..6d7eb06ea --- /dev/null +++ b/tests/sql_battery/tests/results/complex_005.results_tests @@ -0,0 +1,59 @@ +{ + "summary": "This was an AI generated test case to perform a non-trival query", + "statement": " +SELECT + p.name AS planet_name, + p.density AS planet_density, + p.gravity AS planet_gravity, + p.orbitalPeriod AS orbital_period, + satellite_data.total_satellites, + satellite_data.avg_density, + satellite_data.max_radius, +FROM + $planets p +JOIN + (SELECT + planetId, + COUNT(*) AS total_satellites, + AVG(density) AS avg_density, + MAX(radius) AS max_radius + FROM + $satellites as s1 + WHERE + gm > 1.0 + GROUP BY + planetId + HAVING + COUNT(*) > 2) AS satellite_data ON p.id = satellite_data.planetId +LEFT JOIN + (SELECT + planetId, + AVG(radius) AS avg_radius, + MIN(id) AS min_magnitude, + AVG(magnitude) as avg_magnitude + FROM + $satellites as s2 + GROUP BY + planetId + HAVING + AVG(magnitude) < 25) AS orbital_data ON p.id = orbital_data.planetId +WHERE + p.gravity BETWEEN 0 AND 10 AND + p.orbitalPeriod > 10000 AND + orbital_data.avg_radius > 0 +ORDER BY + satellite_data.total_satellites DESC, + satellite_data.avg_density, + orbital_data.min_magnitude ASC; +", + "result": { + "planet_name": ["Saturn", "Uranus"], + "planet_density": [687, 1271], + "planet_gravity": [9.0, 8.7], + "orbital_period": [10747.0, 30589.0], + "satellite_data.total_satellites": [7, 5], + "satellite_data.avg_density": [1.3435714285714284, 1.4972], + "satellite_data.max_radius": [2574.73, 788.9], + "orbital_data.avg_magnitude ": [20.75272727272727, 21.376666666666665] + } +} diff --git a/tests/sql_battery/tests/results/regression_001.results_tests b/tests/sql_battery/tests/results/regression_001.results_tests index 827350eda..92363f8a1 100644 --- a/tests/sql_battery/tests/results/regression_001.results_tests +++ b/tests/sql_battery/tests/results/regression_001.results_tests @@ -1,5 +1,47 @@ { "summary": "This has caught a few different problems", - "statement": "SET @planet = 'Saturn'; SELECT name AS nom, bigsats.occurances, smallsats.occurances FROM (SELECT DISTINCT id as planetId, name FROM $planets WHERE name = @planet) as planets LEFT JOIN (SELECT planetId, COUNT(*) AS occurances FROM $satellites FOR DATES BETWEEN '2022-01-01' AND TODAY WHERE gm > 10 GROUP BY planetId) AS bigsats ON bigsats.planetId = planets.planetId LEFT JOIN (SELECT planetId, COUNT(*) AS occurances FROM $satellites FOR DATES IN LAST_MONTH WHERE gm < 10 GROUP BY planetId) as smallsats ON smallsats.planetId = planets.planetId;", + "statement": " +SET @planet = 'Saturn'; + +SELECT + planets.name AS nom, + bigsats.occurances AS big_satellites_occurances, + smallsats.occurances AS small_satellites_occurances +FROM + (SELECT DISTINCT + id AS planetId, + name + FROM + $planets + WHERE + name = @planet) AS planets +LEFT JOIN + (SELECT + planetId, + COUNT(*) AS occurances + FROM + $satellites s1 + FOR DATES BETWEEN '2022-01-01' AND TODAY + WHERE + gm > 10 + GROUP BY + planetId) AS bigsats +ON + bigsats.planetId = planets.planetId +LEFT JOIN + (SELECT + planetId, + COUNT(*) AS occurances + FROM + $satellites s2 + FOR DATES IN LAST_MONTH + WHERE + gm < 10 + GROUP BY + planetId) AS smallsats +ON + smallsats.planetId = planets.planetId; + + ", "result": {"nom": ["Saturn"], "bigsats.occurances":[5], "smallsats.occurances":[56]} } \ No newline at end of file diff --git a/tests/sql_battery/tests/v2_planner.run_tests b/tests/sql_battery/tests/v2_planner.run_tests index 44face5f6..f28658566 100644 --- a/tests/sql_battery/tests/v2_planner.run_tests +++ b/tests/sql_battery/tests/v2_planner.run_tests @@ -24,10 +24,10 @@ SELECT * FROM $planets AS P RIGHT ANTI JOIN $satellites AS S USING(id); SELECT * FROM $planets AS P RIGHT SEMI JOIN $satellites AS S USING(id); # EXPLAIN FORMAT -# EXPLAIN ANALYZE FORMAT JSON SELECT * FROM $planets AS a INNER JOIN (SELECT id FROM $planets) AS b USING (id); +EXPLAIN ANALYZE FORMAT JSON SELECT * FROM $planets AS a INNER JOIN (SELECT id FROM $planets) AS b USING (id); # DISTINCT ON SELECT DISTINCT ON (planetId) planetId, name FROM $satellites; # CONDITIONS IN AGGREGATES -# SELECT SUM(DISTINCT id ORDER BY id) FROM $planets \ No newline at end of file +SELECT SUM(DISTINCT id ORDER BY id) FROM $planets \ No newline at end of file