From ed86456ad936c1a2b8cbbebd1bc16390c3b589cc Mon Sep 17 00:00:00 2001 From: Ville Puuska <40150442+VillePuuska@users.noreply.github.com> Date: Sat, 21 Sep 2024 16:08:15 +0000 Subject: [PATCH] Polars read and scan Hive partitioned table: path no longer needs a glob pattern after version bump --- uchelper/dataframe.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/uchelper/dataframe.py b/uchelper/dataframe.py index a50bb1f..926e456 100644 --- a/uchelper/dataframe.py +++ b/uchelper/dataframe.py @@ -196,11 +196,8 @@ def read_table(table: Table) -> pl.DataFrame: if len(partition_cols) == 0: df = pl.read_parquet(source=path) else: - # TODO: There HAS to be a nicer way to do this. Try with Polars >1.0? df = pl.read_parquet( - source=os.path.join( - path, *["**" for _ in range(len(partition_cols))], "*.parquet" - ), + source=path, hive_partitioning=True, hive_schema={ col.name: uc_type_to_polars_type(col.data_type) @@ -239,11 +236,8 @@ def scan_table(table: Table) -> pl.LazyFrame: if len(partition_cols) == 0: df = pl.scan_parquet(source=path) else: - # TODO: There HAS to be a nicer way to do this. Try with Polars >1.0? df = pl.scan_parquet( - source=os.path.join( - path, *["**" for _ in range(len(partition_cols))], "*.parquet" - ), + source=path, hive_partitioning=True, hive_schema={ col.name: uc_type_to_polars_type(col.data_type)