prrao87 · prrao87 · Oct 27, 2023 · Oct 27, 2023 · Oct 27, 2023 · Oct 27, 2023
diff --git a/cities.parquet b/cities.parquet
diff --git a/data/output/nodes/persons.parquet b/data/output/nodes/persons.parquet
diff --git a/kuzudb/benchmark_query.py b/kuzudb/benchmark_query.py
@@ -3,20 +3,15 @@
 `pip install pytest-benchmark`
 """
 import pytest
-from dotenv import load_dotenv
 import kuzu
 
 import query
 
-load_dotenv()
-
 
 @pytest.fixture(scope="session")
 def connection():
-    db = kuzu.Database(f"./social_network")
+    db = kuzu.Database("social_network")
     conn = kuzu.Connection(db)
-    # For a fairer comparison with Neo4j, where “Transactions are single-threaded, confined, and independent.”
-    # conn.set_max_threads_for_exec(1)
     yield conn
 
 
@@ -65,9 +60,9 @@ def test_benchmark_query4(benchmark, connection):
     assert result[0]["countries"] == "United States"
     assert result[1]["countries"] == "Canada"
     assert result[2]["countries"] == "United Kingdom"
-    assert result[0]["personCounts"] == 30431
-    assert result[1]["personCounts"] == 3064
-    assert result[2]["personCounts"] == 1870
+    assert result[0]["personCounts"] == 30453
+    assert result[1]["personCounts"] == 3062
+    assert result[2]["personCounts"] == 1865
 
 
 def test_benchmark_query5(benchmark, connection):
@@ -136,5 +131,4 @@ def test_benchmark_query9(benchmark, connection):
     result = result.to_dicts()
 
     assert len(result) == 1
-    assert result[0]["numPaths"] == 45558131
-
+    assert result[0]["numPaths"] == 45455419
diff --git a/kuzudb/build_graph.py b/kuzudb/build_graph.py
@@ -37,9 +37,9 @@ def create_city_node_table(conn: Connection) -> None:
                 city STRING,
                 state STRING,
                 country STRING,
-                lat FLOAT,
-                lon FLOAT,
-                population INT64,
+                lat DOUBLE,
+                lon DOUBLE,
+                population INT32,
                 PRIMARY KEY (id)
             )
         """

diff --git a/kuzudb/query.py b/kuzudb/query.py
@@ -18,7 +18,7 @@ def run_query1(conn: Connection) -> None:
     """
     print(f"\nQuery 1:\n {query}")
     response = conn.execute(query)
-    result = pl.from_arrow(response.get_as_arrow(chunk_size=1000))
+    result = pl.from_arrow(response.get_as_arrow(chunk_size=10_000))
     print(f"Top 3 most-followed persons:\n{result}")
     return result
 
@@ -34,7 +34,7 @@ def run_query2(conn: Connection) -> None:
     """
     print(f"\nQuery 2:\n {query}")
     response = conn.execute(query)
-    result = pl.from_arrow(response.get_as_arrow(chunk_size=1000))
+    result = pl.from_arrow(response.get_as_arrow(chunk_size=10_000))
     print(f"City in which most-followed person lives:\n{result}")
     return result
 
@@ -49,7 +49,7 @@ def run_query3(conn: Connection, params: list[tuple[str, Any]]) -> None:
     """
     print(f"\nQuery 3:\n {query}")
     response = conn.execute(query, parameters=params)
-    result = pl.from_arrow(response.get_as_arrow(chunk_size=1000))
+    result = pl.from_arrow(response.get_as_arrow(chunk_size=10_000))
     print(f"Cities with lowest average age in {params['country']}:\n{result}")
     return result
 
@@ -64,7 +64,7 @@ def run_query4(conn: Connection, params: list[tuple[str, Any]]) -> None:
     """
     print(f"\nQuery 4:\n {query}")
     response = conn.execute(query, parameters=params)
-    result = pl.from_arrow(response.get_as_arrow(chunk_size=1000))
+    result = pl.from_arrow(response.get_as_arrow(chunk_size=10_000))
     print(f"Persons between ages {params['age_lower']}-{params['age_upper']} in each country:\n{result}")
     return result
 
@@ -82,7 +82,7 @@ def run_query5(conn: Connection, params: list[tuple[str, Any]]) -> None:
     """
     print(f"\nQuery 5:\n {query}")
     response = conn.execute(query, parameters=params)
-    result = pl.from_arrow(response.get_as_arrow(chunk_size=1000))
+    result = pl.from_arrow(response.get_as_arrow(chunk_size=10_000))
     print(
         f"Number of {params['gender']} users in {params['city']}, {params['country']} who have an interest in {params['interest']}:\n{result}"
     )
@@ -102,7 +102,7 @@ def run_query6(conn: Connection, params: list[tuple[str, Any]]) -> None:
     """
     print(f"\nQuery 6:\n {query}")
     response = conn.execute(query, parameters=params)
-    result = pl.from_arrow(response.get_as_arrow(chunk_size=1000))
+    result = pl.from_arrow(response.get_as_arrow(chunk_size=10_000))
     print(
         f"City with the most {params['gender']} users who have an interest in {params['interest']}:\n{result}"
     )
@@ -122,7 +122,7 @@ def run_query7(conn: Connection, params: list[tuple[str, Any]]) -> None:
     """
     print(f"\nQuery 7:\n {query}")
     response = conn.execute(query, parameters=params)
-    result = pl.from_arrow(response.get_as_arrow(chunk_size=1000))
+    result = pl.from_arrow(response.get_as_arrow(chunk_size=10_000))
     print(
         f"""
         State in {params['country']} with the most users between ages {params['age_lower']}-{params['age_upper']} who have an interest in {params['interest']}:\n{result}
@@ -139,7 +139,7 @@ def run_query8(conn: Connection) -> None:
     """
     print(f"\nQuery 8:\n {query}")
     response = conn.execute(query)
-    result = pl.from_arrow(response.get_as_arrow(chunk_size=1000))
+    result = pl.from_arrow(response.get_as_arrow(chunk_size=10_000))
     print(
         f"""
         Number of second-degree paths:\n{result}
@@ -158,7 +158,7 @@ def run_query9(conn: Connection, params: list[tuple[str, Any]]) -> None:
 
     print(f"\nQuery 9:\n {query}")
     response = conn.execute(query, parameters=params)
-    result = pl.from_arrow(response.get_as_arrow(chunk_size=1000))
+    result = pl.from_arrow(response.get_as_arrow(chunk_size=10_000))
     print(
         f"""
         Number of paths through persons below {params['age_1']} to persons above {params['age_2']}:\n{result}
@@ -200,7 +200,5 @@ def main(conn: Connection) -> None:
     DB_NAME = "social_network"
     db = kuzu.Database(f"./{DB_NAME}")
     CONNECTION = kuzu.Connection(db)
-    # For a fairer comparison with Neo4j, where “Transactions are single-threaded, confined, and independent.”
-    CONNECTION.set_max_threads_for_exec(1)
 
     main(CONNECTION)
diff --git a/neo4j/.env.example b/neo4j/.env.example
@@ -1,3 +1,3 @@
-NEO4J_VERSION = "5.12.0"
+NEO4J_VERSION = "5.13.0"
 NEO4J_USER = "neo4j"
 NEO4J_PASSWORD =
diff --git a/neo4j/benchmark_query.py b/neo4j/benchmark_query.py
@@ -68,9 +68,9 @@ def test_benchmark_query4(benchmark, session):
     assert result[0]["countries"] == "United States"
     assert result[1]["countries"] == "Canada"
     assert result[2]["countries"] == "United Kingdom"
-    assert result[0]["personCounts"] == 30431
-    assert result[1]["personCounts"] == 3064
-    assert result[2]["personCounts"] == 1870
+    assert result[0]["personCounts"] == 30453
+    assert result[1]["personCounts"] == 3062
+    assert result[2]["personCounts"] == 1865
 
 
 def test_benchmark_query5(benchmark, session):
@@ -114,4 +114,4 @@ def test_benchmark_query9(benchmark, session):
     result = result.to_dicts()
 
     assert len(result) == 1
-    assert result[0]["numPaths"] == 45558131
+    assert result[0]["numPaths"] == 45455419
diff --git a/requirements.txt b/requirements.txt
@@ -1,7 +1,8 @@
-faker~=19.2.0
+faker~=19.12.0
 polars~=0.19.0
 numpy>=1.25.0
-kuzu==0.0.9
+pyarrow~=13.0.0
+kuzu==0.0.11
 neo4j~=5.13.0
 python-dotenv>=1.0.0
 codetiming>=1.4.0