Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
joocer committed Sep 26, 2024
1 parent b3de201 commit aa92c92
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 27 deletions.
15 changes: 5 additions & 10 deletions opteryx/operators/heap_sort_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,19 +90,14 @@ def execute(self) -> Generator[pyarrow.Table, None, None]: # pragma: no cover
for morsel in morsels.execute():
start_time = time.time_ns()

if morsel.num_rows > self.limit:
# not much point doing this here if we're not eliminating rows
morsel = morsel.sort_by(mapped_order)
morsel = morsel.slice(offset=0, length=self.limit)

if table:
# Concatenate the current morsel with the previously accumulated table
morsel = concat_tables([morsel, table], promote_options="permissive")
# Concatenate the accumulated table with the new morsel
table = concat_tables([table, morsel], promote_options="permissive")
else:
table = morsel

# Sort and slice the concatenated table to maintain the limit
morsel = morsel.sort_by(mapped_order)
morsel = morsel.slice(offset=0, length=self.limit)
table = morsel
table = table.sort_by(mapped_order).slice(offset=0, length=self.limit)

self.statistics.time_heap_sorting += time.time_ns() - start_time

Expand Down
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import platform
from distutils.sysconfig import get_config_var
from typing import Any
from typing import Dict

Expand All @@ -9,8 +10,6 @@
from setuptools import find_packages
from setuptools import setup
from setuptools_rust import RustExtension
from distutils.sysconfig import get_config_var


LIBRARY = "opteryx"

Expand Down
16 changes: 1 addition & 15 deletions tests/fuzzing/test_sql_fuzzer_single_table_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,24 +184,10 @@ def test_sql_fuzzing_single_table(i):
# Log failing statement and error for analysis
raise e
print()
return execution_time, statement

if __name__ == "__main__": # pragma: no cover
import heapq

top_n: int = 5
slowest_executions = []

for i in range(TEST_CYCLES):
et, st = test_sql_fuzzing_single_table(i)

# Use a heap to maintain only the top N slowest executions
if len(slowest_executions) < top_n:
# If we have less than `top_n` elements, add the current result
heapq.heappush(slowest_executions, (et, i, st))
else:
# If we already have `top_n` elements, replace the smallest one if the current one is larger
heapq.heappushpop(slowest_executions, (et, i, st))
test_sql_fuzzing_single_table(i)

print("✅ okay\n")
print("\n".join(f"{s[1]:03} {s[0]:.4f} {format_sql(s[2])}" for s in sorted(slowest_executions, key=lambda x: x[0], reverse=True)))

0 comments on commit aa92c92

Please sign in to comment.