From 25e42c8670e2a2db1f74f6a1833217b0321bc8d0 Mon Sep 17 00:00:00 2001 From: Matt Green Date: Mon, 9 Sep 2024 16:00:50 -0700 Subject: [PATCH 01/26] update dependencies --- Cargo.lock | 773 +++++++++++++++++++++++-------------------- Cargo.toml | 9 +- src/common/schema.rs | 32 +- 3 files changed, 435 insertions(+), 379 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d507321d..bc6c9a5a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,18 +4,18 @@ version = 3 [[package]] name = "addr2line" -version = "0.22.0" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" +checksum = "f5fb1d8e4442bd405fdfd1dacb42792696b0cf9cb15882e5d097b742a676d375" dependencies = [ "gimli", ] [[package]] -name = "adler" -version = "1.0.2" +name = "adler2" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" [[package]] name = "adler32" @@ -84,9 +84,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.86" +version = "1.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" +checksum = "10f00e1f6e58a40e807377c75c6a7f97bf9044fab57816f2414e6f5f4499d7b8" [[package]] name = "apache-avro" @@ -124,15 +124,15 @@ checksum = "9d151e35f61089500b617991b791fc8bfd237ae50cd5950803758a179b41e67a" [[package]] name = "arrayvec" -version = "0.7.4" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05048a8932648b63f21c37d88b552ccc8a65afb6dfe9fc9f30ce79174c2e7a85" +checksum = "45aef0d9cf9a039bf6cd1acc451b137aca819977b0928dece52bd92811b640ba" dependencies = [ "arrow-arith", "arrow-array", @@ -152,9 +152,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d8a57966e43bfe9a3277984a14c24ec617ad874e4c0e1d2a1b083a39cfbf22c" +checksum = "03675e42d1560790f3524800e41403b40d0da1c793fe9528929fde06d8c7649a" dependencies = [ "arrow-array", "arrow-buffer", @@ -167,9 +167,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16f4a9468c882dc66862cef4e1fd8423d47e67972377d85d80e022786427768c" +checksum = "cd2bf348cf9f02a5975c5962c7fa6dee107a2009a7b41ac5fb1a027e12dc033f" dependencies = [ "ahash", "arrow-buffer", @@ -184,9 +184,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c975484888fc95ec4a632cdc98be39c085b1bb518531b0c80c5d462063e5daa1" +checksum = "3092e37715f168976012ce52273c3989b5793b0db5f06cbaa246be25e5f0924d" dependencies = [ "bytes", "half", @@ -195,9 +195,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da26719e76b81d8bc3faad1d4dbdc1bcc10d14704e63dc17fc9f3e7e1e567c8e" +checksum = "7ce1018bb710d502f9db06af026ed3561552e493e989a79d0d0f5d9cf267a785" dependencies = [ "arrow-array", "arrow-buffer", @@ -216,9 +216,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c13c36dc5ddf8c128df19bab27898eea64bf9da2b555ec1cd17a8ff57fba9ec2" +checksum = "fd178575f45624d045e4ebee714e246a05d9652e41363ee3f57ec18cca97f740" dependencies = [ "arrow-array", "arrow-buffer", @@ -235,9 +235,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd9d6f18c65ef7a2573ab498c374d8ae364b4a4edf67105357491c031f716ca5" +checksum = "4e4ac0c4ee79150afe067dc4857154b3ee9c1cd52b5f40d59a77306d0ed18d65" dependencies = [ "arrow-buffer", "arrow-schema", @@ -247,9 +247,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e786e1cdd952205d9a8afc69397b317cfbb6e0095e445c69cda7e8da5c1eeb0f" +checksum = "bb307482348a1267f91b0912e962cd53440e5de0f7fb24c5f7b10da70b38c94a" dependencies = [ "arrow-array", "arrow-buffer", @@ -262,9 +262,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb22284c5a2a01d73cebfd88a33511a3234ab45d66086b2ca2d1228c3498e445" +checksum = "d24805ba326758effdd6f2cbdd482fcfab749544f21b134701add25b33f474e6" dependencies = [ "arrow-array", "arrow-buffer", @@ -282,9 +282,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42745f86b1ab99ef96d1c0bcf49180848a64fe2c7a7a0d945bc64fa2b21ba9bc" +checksum = "644046c479d80ae8ed02a7f1e1399072ea344ca6a7b0e293ab2d5d9ed924aa3b" dependencies = [ "arrow-array", "arrow-buffer", @@ -297,9 +297,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd09a518c602a55bd406bcc291a967b284cfa7a63edfbf8b897ea4748aad23c" +checksum = "a29791f8eb13b340ce35525b723f5f0df17ecb955599e11f65c2a94ab34e2efb" dependencies = [ "ahash", "arrow-array", @@ -311,18 +311,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e972cd1ff4a4ccd22f86d3e53e835c2ed92e0eea6a3e8eadb72b4f1ac802cf8" +checksum = "c85320a3a2facf2b2822b57aa9d6d9d55edb8aee0b6b5d3b8df158e503d10858" dependencies = [ "bitflags 2.6.0", ] [[package]] name = "arrow-select" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "600bae05d43483d216fb3494f8c32fdbefd8aa4e1de237e790dbb3d9f44690a3" +checksum = "9cc7e6b582e23855fd1625ce46e51647aa440c20ea2e71b1d748e0839dd73cba" dependencies = [ "ahash", "arrow-array", @@ -334,9 +334,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0dc1985b67cb45f6606a248ac2b4a288849f196bab8c657ea5589f47cdd55e6" +checksum = "0775b6567c66e56ded19b87a954b6b1beffbdd784ef95a3a2b03f59570c1d230" dependencies = [ "arrow-array", "arrow-buffer", @@ -375,18 +375,18 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] name = "async-trait" -version = "0.1.81" +version = "0.1.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" +checksum = "a27b8a3a6e1a44fa4c8baf1f653e4172e81486d4941f2237e20dc2d0cf4ddff1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] @@ -412,17 +412,17 @@ checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] name = "backtrace" -version = "0.3.73" +version = "0.3.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" dependencies = [ "addr2line", - "cc", "cfg-if", "libc", "miniz_oxide", "object", "rustc-demangle", + "windows-targets", ] [[package]] @@ -460,9 +460,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.5.3" +version = "1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9ec96fe9a81b5e365f9db71fe00edc4fe4ca2cc7dcb7861f0603012a7caa210" +checksum = "d82033247fd8e890df8f740e407ad4d038debb9eb1f40533fffb32e7d17dc6f7" dependencies = [ "arrayref", "arrayvec", @@ -542,12 +542,13 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.7" +version = "1.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26a5c3fd7bfa1ce3897a3a3501d362b2d87b7f2583ebcb4a949ec25911025cbc" +checksum = "b62ac837cdb5cb22e10a256099b4fc502b1dfe560cb282963a974d7abd80e476" dependencies = [ "jobserver", "libc", + "shlex", ] [[package]] @@ -566,7 +567,7 @@ dependencies = [ "iana-time-zone", "num-traits", "serde", - "windows-targets 0.52.6", + "windows-targets", ] [[package]] @@ -593,9 +594,9 @@ dependencies = [ [[package]] name = "cmake" -version = "0.1.50" +version = "0.1.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130" +checksum = "fb1e43aa7fd152b1f968787f7dbcdeb306d1867ff373c69955211876c053f91a" dependencies = [ "cc", ] @@ -633,9 +634,9 @@ dependencies = [ [[package]] name = "constant_time_eq" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" [[package]] name = "core-foundation" @@ -649,9 +650,9 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.6" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "core2" @@ -664,9 +665,9 @@ dependencies = [ [[package]] name = "cpufeatures" -version = "0.2.12" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" +checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" dependencies = [ "libc", ] @@ -731,9 +732,9 @@ checksum = "7762d17f1241643615821a8455a0b2c3e803784b058693d990b11f2dce25a0ca" [[package]] name = "dashmap" -version = "6.0.1" +version = "6.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "804c8821570c3f8b70230c2ba75ffa5c0f9a4189b9a432b6656c536712acae28" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" dependencies = [ "cfg-if", "crossbeam-utils", @@ -746,8 +747,6 @@ dependencies = [ [[package]] name = "datafusion" version = "41.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4fd4a99fc70d40ef7e52b243b4a399c3f8d353a40d5ecb200deee05e49c61bb" dependencies = [ "ahash", "apache-avro", @@ -769,6 +768,7 @@ dependencies = [ "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-nested", + "datafusion-functions-window", "datafusion-optimizer", "datafusion-physical-expr", "datafusion-physical-expr-common", @@ -781,11 +781,11 @@ dependencies = [ "half", "hashbrown", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "num-traits", "num_cpus", - "object_store", + "object_store 0.11.0", "parking_lot", "parquet", "paste", @@ -804,8 +804,6 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "41.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13b3cfbd84c6003594ae1972314e3df303a27ce8ce755fcea3240c90f4c0529" dependencies = [ "arrow-schema", "async-trait", @@ -813,13 +811,12 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-physical-plan", + "parking_lot", ] [[package]] name = "datafusion-common" version = "41.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44fdbc877e3e40dcf88cc8f283d9f5c8851f0a3aa07fee657b1b75ac1ad49b9c" dependencies = [ "ahash", "apache-avro", @@ -833,26 +830,25 @@ dependencies = [ "instant", "libc", "num_cpus", - "object_store", + "object_store 0.11.0", "parquet", + "paste", "pyo3", "sqlparser", + "tokio", ] [[package]] name = "datafusion-common-runtime" version = "41.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a7496d1f664179f6ce3a5cbef6566056ccaf3ea4aa72cc455f80e62c1dd86b1" dependencies = [ + "log", "tokio", ] [[package]] name = "datafusion-execution" version = "41.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "799e70968c815b611116951e3dd876aef04bf217da31b72eec01ee6a959336a1" dependencies = [ "arrow", "chrono", @@ -862,7 +858,7 @@ dependencies = [ "futures", "hashbrown", "log", - "object_store", + "object_store 0.11.0", "parking_lot", "rand", "tempfile", @@ -872,8 +868,6 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "41.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c1841c409d9518c17971d15c9bae62e629eb937e6fb6c68cd32e9186f8b30d2" dependencies = [ "ahash", "arrow", @@ -881,6 +875,9 @@ dependencies = [ "arrow-buffer", "chrono", "datafusion-common", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr-common", "paste", "serde_json", "sqlparser", @@ -888,11 +885,18 @@ dependencies = [ "strum_macros 0.26.4", ] +[[package]] +name = "datafusion-expr-common" +version = "41.0.0" +dependencies = [ + "arrow", + "datafusion-common", + "paste", +] + [[package]] name = "datafusion-functions" version = "41.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8e481cf34d2a444bd8fa09b65945f0ce83dc92df8665b761505b3d9f351bebb" dependencies = [ "arrow", "arrow-buffer", @@ -905,7 +909,7 @@ dependencies = [ "datafusion-expr", "hashbrown", "hex", - "itertools 0.12.1", + "itertools 0.13.0", "log", "md-5", "rand", @@ -918,8 +922,6 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "41.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b4ece19f73c02727e5e8654d79cd5652de371352c1df3c4ac3e419ecd6943fb" dependencies = [ "ahash", "arrow", @@ -927,17 +929,30 @@ dependencies = [ "datafusion-common", "datafusion-execution", "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr", "datafusion-physical-expr-common", + "half", "log", "paste", "sqlparser", ] +[[package]] +name = "datafusion-functions-aggregate-common" +version = "41.0.0" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "datafusion-physical-expr-common", + "rand", +] + [[package]] name = "datafusion-functions-nested" version = "41.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1474552cc824e8c9c88177d454db5781d4b66757d4aca75719306b8343a5e8d" dependencies = [ "arrow", "arrow-array", @@ -949,17 +964,26 @@ dependencies = [ "datafusion-expr", "datafusion-functions", "datafusion-functions-aggregate", - "itertools 0.12.1", + "datafusion-physical-expr-common", + "itertools 0.13.0", "log", "paste", "rand", ] +[[package]] +name = "datafusion-functions-window" +version = "41.0.0" +dependencies = [ + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr-common", + "log", +] + [[package]] name = "datafusion-optimizer" version = "41.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "791ff56f55608bc542d1ea7a68a64bdc86a9413f5a381d06a39fd49c2a3ab906" dependencies = [ "arrow", "async-trait", @@ -969,7 +993,7 @@ dependencies = [ "datafusion-physical-expr", "hashbrown", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "paste", "regex-syntax", @@ -978,8 +1002,6 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "41.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a223962b3041304a3e20ed07a21d5de3d88d7e4e71ca192135db6d24e3365a4" dependencies = [ "ahash", "arrow", @@ -993,12 +1015,14 @@ dependencies = [ "datafusion-common", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", "datafusion-physical-expr-common", "half", "hashbrown", "hex", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "paste", "petgraph", @@ -1008,13 +1032,11 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "41.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db5e7d8532a1601cd916881db87a70b0a599900d23f3db2897d389032da53bc6" dependencies = [ "ahash", "arrow", "datafusion-common", - "datafusion-expr", + "datafusion-expr-common", "hashbrown", "rand", ] @@ -1022,20 +1044,18 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" version = "41.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb9c78f308e050f5004671039786a925c3fee83b90004e9fcfd328d7febdcc0" dependencies = [ + "arrow-schema", "datafusion-common", "datafusion-execution", "datafusion-physical-expr", "datafusion-physical-plan", + "itertools 0.13.0", ] [[package]] name = "datafusion-physical-plan" version = "41.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d1116949432eb2d30f6362707e2846d942e491052a206f2ddcb42d08aea1ffe" dependencies = [ "ahash", "arrow", @@ -1050,13 +1070,14 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", "datafusion-physical-expr", "datafusion-physical-expr-common", "futures", "half", "hashbrown", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "once_cell", "parking_lot", @@ -1075,15 +1096,15 @@ dependencies = [ "datafusion-substrait", "futures", "mimalloc", - "object_store", + "object_store 0.10.2", "parking_lot", - "prost", - "prost-types", + "prost 0.12.6", + "prost-types 0.12.6", "pyo3", "pyo3-build-config", "rand", "regex-syntax", - "syn 2.0.72", + "syn 2.0.77", "tokio", "url", "uuid", @@ -1092,8 +1113,6 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "41.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b45d0180711165fe94015d7c4123eb3e1cf5fb60b1506453200b8d1ce666bef0" dependencies = [ "arrow", "arrow-array", @@ -1109,17 +1128,15 @@ dependencies = [ [[package]] name = "datafusion-substrait" version = "41.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf0a0055aa98246c79f98f0d03df11f16cb7adc87818d02d4413e3f3cdadbbee" dependencies = [ "arrow-buffer", "async-recursion", "chrono", "datafusion", - "itertools 0.12.1", - "object_store", + "itertools 0.13.0", + "object_store 0.11.0", "pbjson-types", - "prost", + "prost 0.13.2", "substrait", "url", ] @@ -1171,9 +1188,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" +checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" [[package]] name = "fixedbitset" @@ -1193,9 +1210,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.31" +version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f211bbe8e69bbd0cfdea405084f128ae8b4aaa6b0b522fc8f2b009084797920" +checksum = "324a1be68054ef05ad64b861cc9eaf1d623d2d8cb25b4bf2cb9cdd902b4bf253" dependencies = [ "crc32fast", "miniz_oxide", @@ -1272,7 +1289,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] @@ -1328,9 +1345,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.29.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" +checksum = "32085ea23f3234fc7846555e85283ba4de91e21016dc0455a16286d87a292d64" [[package]] name = "glob" @@ -1340,9 +1357,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa82e28a107a8cc405f0839610bdc9b15f1e25ec7d696aa5cf173edbcb1486ab" +checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205" dependencies = [ "atomic-waker", "bytes", @@ -1470,16 +1487,16 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.27.2" +version = "0.27.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee4be2c948921a1a5320b629c4193916ed787a7f7f293fd3f7f5a6c9de74155" +checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333" dependencies = [ "futures-util", "http", "hyper", "hyper-util", "rustls", - "rustls-native-certs", + "rustls-native-certs 0.8.0", "rustls-pki-types", "tokio", "tokio-rustls", @@ -1488,9 +1505,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.7" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cde7055719c54e36e95e8719f95883f22072a48ede39db7fc17a4e1d5281e9b9" +checksum = "da62f120a8a37763efb0cf8fdf264b884c7b8b9ac8660b900c8661030c00e6ba" dependencies = [ "bytes", "futures-channel", @@ -1541,9 +1558,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.3.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de3fc2e30ba82dd1b3911c8de1ffc143c74a914a14e99514d7637e3099df5ea0" +checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5" dependencies = [ "equivalent", "hashbrown", @@ -1575,18 +1592,9 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" [[package]] name = "ipnet" -version = "2.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" - -[[package]] -name = "itertools" -version = "0.11.0" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" -dependencies = [ - "either", -] +checksum = "187674a687eed5fe42285b40c6291f9a01517d415fad1c3cbc6a9f778af7fcd4" [[package]] name = "itertools" @@ -1623,9 +1631,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.69" +version = "0.3.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +checksum = "1868808506b929d7b0cfa8f75951347aa71bb21144b7791bae35d9bccfcfe37a" dependencies = [ "wasm-bindgen", ] @@ -1702,9 +1710,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.155" +version = "0.2.158" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" [[package]] name = "libflate" @@ -1830,18 +1838,18 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "miniz_oxide" -version = "0.7.4" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" dependencies = [ - "adler", + "adler2", ] [[package]] name = "mio" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4569e456d394deccd22ce1c1913e6ea0e54519f577285001215d33557431afe4" +checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" dependencies = [ "hermit-abi", "libc", @@ -1941,9 +1949,9 @@ dependencies = [ [[package]] name = "object" -version = "0.36.2" +version = "0.36.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f203fa8daa7bb185f760ae12bd8e097f63d17041dcdcaf675ac54cdf863170e" +checksum = "084f1a5821ac4c651660a94a7153d27ac9d8a53736203f58b31945ded098070a" dependencies = [ "memchr", ] @@ -1972,7 +1980,28 @@ dependencies = [ "rustls-pemfile", "serde", "serde_json", - "snafu", + "snafu 0.7.5", + "tokio", + "tracing", + "url", + "walkdir", +] + +[[package]] +name = "object_store" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25a0c4b3a0e31f8b66f71ad8064521efa773910196e2cde791436f13409f3b45" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures", + "humantime", + "itertools 0.13.0", + "parking_lot", + "percent-encoding", + "snafu 0.8.4", "tokio", "tracing", "url", @@ -2020,14 +2049,14 @@ dependencies = [ "libc", "redox_syscall", "smallvec", - "windows-targets 0.52.6", + "windows-targets", ] [[package]] name = "parquet" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e977b9066b4d3b03555c22bdc442f3fadebd96a39111249113087d0edb2691cd" +checksum = "f0fbf928021131daaa57d334ca8e3904fe9ae22f73c56244fc7db9b04eedc3d8" dependencies = [ "ahash", "arrow-array", @@ -2048,7 +2077,7 @@ dependencies = [ "lz4_flex", "num", "num-bigint", - "object_store", + "object_store 0.11.0", "paste", "seq-macro", "snap", @@ -2076,9 +2105,9 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "pbjson" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1030c719b0ec2a2d25a5df729d6cff1acf3cc230bf766f4f97833591f7577b90" +checksum = "c7e6349fa080353f4a597daffd05cb81572a9c031a6d4fff7e504947496fcc68" dependencies = [ "base64 0.21.7", "serde", @@ -2086,27 +2115,27 @@ dependencies = [ [[package]] name = "pbjson-build" -version = "0.6.2" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2580e33f2292d34be285c5bc3dba5259542b083cfad6037b6d70345f24dcb735" +checksum = "6eea3058763d6e656105d1403cb04e0a41b7bbac6362d413e7c33be0c32279c9" dependencies = [ - "heck 0.4.1", - "itertools 0.11.0", - "prost", - "prost-types", + "heck 0.5.0", + "itertools 0.13.0", + "prost 0.13.2", + "prost-types 0.13.2", ] [[package]] name = "pbjson-types" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18f596653ba4ac51bdecbb4ef6773bc7f56042dc13927910de1684ad3d32aa12" +checksum = "e54e5e7bfb1652f95bc361d76f3c780d8e526b134b85417e774166ee941f0887" dependencies = [ "bytes", "chrono", "pbjson", "pbjson-build", - "prost", + "prost 0.13.2", "prost-build", "serde", ] @@ -2182,7 +2211,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] @@ -2220,12 +2249,12 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.20" +version = "0.2.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" +checksum = "479cf940fbbb3426c32c5d5176f62ad57549a0bb84773423ba8be9d089f5faba" dependencies = [ "proc-macro2", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] @@ -2244,27 +2273,37 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29" dependencies = [ "bytes", - "prost-derive", + "prost-derive 0.12.6", +] + +[[package]] +name = "prost" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b2ecbe40f08db5c006b5764a2645f7f3f141ce756412ac9e1dd6087e6d32995" +dependencies = [ + "bytes", + "prost-derive 0.13.2", ] [[package]] name = "prost-build" -version = "0.12.6" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4" +checksum = "f8650aabb6c35b860610e9cff5dc1af886c9e25073b7b1712a68972af4281302" dependencies = [ "bytes", "heck 0.5.0", - "itertools 0.12.1", + "itertools 0.13.0", "log", "multimap", "once_cell", "petgraph", "prettyplease", - "prost", - "prost-types", + "prost 0.13.2", + "prost-types 0.13.2", "regex", - "syn 2.0.72", + "syn 2.0.77", "tempfile", ] @@ -2278,7 +2317,20 @@ dependencies = [ "itertools 0.12.1", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", +] + +[[package]] +name = "prost-derive" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acf0c195eebb4af52c752bec4f52f645da98b6e92077a04110c7f349477ae5ac" +dependencies = [ + "anyhow", + "itertools 0.13.0", + "proc-macro2", + "quote", + "syn 2.0.77", ] [[package]] @@ -2287,7 +2339,16 @@ version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9091c90b0a32608e984ff2fa4091273cbdd755d54935c51d520887f4a1dbd5b0" dependencies = [ - "prost", + "prost 0.12.6", +] + +[[package]] +name = "prost-types" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60caa6738c7369b940c3d49246a8d1749323674c65cb13010134f5c9bad5b519" +dependencies = [ + "prost 0.13.2", ] [[package]] @@ -2301,15 +2362,15 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.21.2" +version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5e00b96a521718e08e03b1a622f01c8a8deb50719335de3f60b3b3950f069d8" +checksum = "831e8e819a138c36e212f3af3fd9eeffed6bf1510a805af35b0edee5ffa59433" dependencies = [ "cfg-if", "indoc", "libc", "memoffset", - "parking_lot", + "once_cell", "portable-atomic", "pyo3-build-config", "pyo3-ffi", @@ -2319,9 +2380,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.21.2" +version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7883df5835fafdad87c0d888b266c8ec0f4c9ca48a5bed6bbb592e8dedee1b50" +checksum = "1e8730e591b14492a8945cdff32f089250b05f5accecf74aeddf9e8272ce1fa8" dependencies = [ "once_cell", "target-lexicon", @@ -2329,9 +2390,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.21.2" +version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01be5843dc60b916ab4dad1dca6d20b9b4e6ddc8e15f50c47fe6d85f1fb97403" +checksum = "5e97e919d2df92eb88ca80a037969f44e5e70356559654962cbb3316d00300c6" dependencies = [ "libc", "pyo3-build-config", @@ -2339,34 +2400,34 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.21.2" +version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77b34069fc0682e11b31dbd10321cbf94808394c56fd996796ce45217dfac53c" +checksum = "eb57983022ad41f9e683a599f2fd13c3664d7063a3ac5714cae4b7bee7d3f206" dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] name = "pyo3-macros-backend" -version = "0.21.2" +version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08260721f32db5e1a5beae69a55553f56b99bd0e1c3e6e0a5e8851a9d0f5a85c" +checksum = "ec480c0c51ddec81019531705acac51bcdbeae563557c982aa8263bb96880372" dependencies = [ - "heck 0.4.1", + "heck 0.5.0", "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] name = "quad-rand" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "658fa1faf7a4cc5f057c9ee5ef560f717ad9d8dc66d975267f709624d6e1ab88" +checksum = "b76f1009795ca44bb5aaae8fd3f18953e209259c33d9b059b1f53d58ab7511db" [[package]] name = "quick-xml" @@ -2380,9 +2441,9 @@ dependencies = [ [[package]] name = "quinn" -version = "0.11.3" +version = "0.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b22d8e7369034b9a7132bc2008cac12f2013c8132b45e0554e6e20e2617f2156" +checksum = "8c7c5fdde3cdae7203427dc4f0a68fe0ed09833edc525a03456b153b79828684" dependencies = [ "bytes", "pin-project-lite", @@ -2398,9 +2459,9 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.6" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba92fb39ec7ad06ca2582c0ca834dfeadcaf06ddfc8e635c80aa7e1c05315fdd" +checksum = "fadfaed2cd7f389d0161bb73eeb07b7b78f8691047a6f3e73caaeae55310a4a6" dependencies = [ "bytes", "rand", @@ -2415,22 +2476,22 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.4" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bffec3605b73c6f1754535084a85229fa8a30f86014e6c81aeec4abb68b0285" +checksum = "4fe68c2e9e1a1234e218683dbdf9f9dfcb094113c5ac2b938dfcb9bab4c4140b" dependencies = [ "libc", "once_cell", "socket2", "tracing", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "quote" -version = "1.0.36" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" dependencies = [ "proc-macro2", ] @@ -2521,9 +2582,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.5" +version = "0.12.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7d6d2a27d57148378eb5e111173f4276ad26340ecc5c49a4a2152167a2d6a37" +checksum = "f8f4955649ef5c38cc7f9e8aa41761d48fb9677197daea9984dc54f56aad5e63" dependencies = [ "base64 0.22.1", "bytes", @@ -2545,7 +2606,7 @@ dependencies = [ "pin-project-lite", "quinn", "rustls", - "rustls-native-certs", + "rustls-native-certs 0.7.3", "rustls-pemfile", "rustls-pki-types", "serde", @@ -2561,7 +2622,7 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", - "winreg", + "windows-registry", ] [[package]] @@ -2599,18 +2660,18 @@ checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" [[package]] name = "rustc_version" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" dependencies = [ "semver", ] [[package]] name = "rustix" -version = "0.38.34" +version = "0.38.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +checksum = "3f55e80d50763938498dd5ebb18647174e0c76dc38c5505294bb224624f30f36" dependencies = [ "bitflags 2.6.0", "errno", @@ -2635,9 +2696,22 @@ dependencies = [ [[package]] name = "rustls-native-certs" -version = "0.7.1" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a88d6d420651b496bdd98684116959239430022a115c1240e6c3993be0b15fba" +checksum = "e5bfb394eeed242e909609f56089eecfe5fda225042e8b171791b9c95f5931e5" +dependencies = [ + "openssl-probe", + "rustls-pemfile", + "rustls-pki-types", + "schannel", + "security-framework", +] + +[[package]] +name = "rustls-native-certs" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcaf18a4f2be7326cd874a5fa579fae794320a0f388d365dca7e480e55f83f8a" dependencies = [ "openssl-probe", "rustls-pemfile", @@ -2664,9 +2738,9 @@ checksum = "fc0a2ce646f8655401bb81e7927b812614bd5d91dbc968696be50603510fcaf0" [[package]] name = "rustls-webpki" -version = "0.102.6" +version = "0.102.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e6b52d4fda176fd835fdc55a835d4a89b8499cad995885a21149d5ad62f852e" +checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" dependencies = [ "ring", "rustls-pki-types", @@ -2696,11 +2770,11 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.23" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" +checksum = "e9aaafd5a2b6e3d657ff009d82fbd630b6bd54dd4eb06f21693925cdf80f9b8b" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -2724,7 +2798,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] @@ -2773,22 +2847,22 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.204" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" +checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.204" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" +checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] @@ -2799,14 +2873,14 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] name = "serde_json" -version = "1.0.122" +version = "1.0.128" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da" +checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" dependencies = [ "itoa", "memchr", @@ -2816,14 +2890,14 @@ dependencies = [ [[package]] name = "serde_tokenstream" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8790a7c3fe883e443eaa2af6f705952bc5d6e8671a220b9335c8cae92c037e74" +checksum = "64060d864397305347a78851c51588fd283767e7e7589829e8121d65512340f1" dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] @@ -2862,6 +2936,12 @@ dependencies = [ "digest", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "siphasher" version = "0.3.11" @@ -2890,7 +2970,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4de37ad025c587a29e8f3f5605c00f70b98715ef90b9061a815b9e59e9042d6" dependencies = [ "doc-comment", - "snafu-derive", + "snafu-derive 0.7.5", +] + +[[package]] +name = "snafu" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b835cb902660db3415a672d862905e791e54d306c6e8189168c7f3d9ae1c79d" +dependencies = [ + "snafu-derive 0.8.4", ] [[package]] @@ -2905,6 +2994,18 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "snafu-derive" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38d1e02fca405f6280643174a50c942219f0bbf4dbf7d480f1dd864d6f211ae5" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.77", +] + [[package]] name = "snap" version = "1.1.1" @@ -2929,9 +3030,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" -version = "0.49.0" +version = "0.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a404d0e14905361b918cb8afdb73605e25c1d5029312bd9785142dcb3aa49e" +checksum = "b2e5b515a2bd5168426033e9efbfd05500114833916f1d5c268f938b4ee130ac" dependencies = [ "log", "sqlparser_derive", @@ -2945,7 +3046,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] @@ -2979,7 +3080,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] @@ -2992,30 +3093,30 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] name = "substrait" -version = "0.36.0" +version = "0.41.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1ee6e584c8bf37104b7eb51c25eae07a9321b0e01379bec3b7c462d2f42afbf" +checksum = "2a3bf05f1d7a3fd7a97790d410f6e859b3a98dcde05e7a3fc00b31b0f60fe7cb" dependencies = [ "heck 0.5.0", "pbjson", "pbjson-build", "pbjson-types", "prettyplease", - "prost", + "prost 0.13.2", "prost-build", - "prost-types", + "prost-types 0.13.2", "protobuf-src", "schemars", "semver", "serde", "serde_json", "serde_yaml", - "syn 2.0.72", + "syn 2.0.77", "typify", "walkdir", ] @@ -3039,9 +3140,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.72" +version = "2.0.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af" +checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" dependencies = [ "proc-macro2", "quote", @@ -3053,6 +3154,9 @@ name = "sync_wrapper" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" +dependencies = [ + "futures-core", +] [[package]] name = "target-lexicon" @@ -3062,15 +3166,15 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" [[package]] name = "tempfile" -version = "3.11.0" +version = "3.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8fcd239983515c23a32fb82099f97d0b11b8c72f654ed659363a95c3dad7a53" +checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64" dependencies = [ "cfg-if", "fastrand", "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -3090,7 +3194,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] @@ -3130,9 +3234,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.39.2" +version = "1.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daa4fb1bc778bd6f04cbfc4bb2d06a7396a8f299dc33ea1900cedaa316f467b1" +checksum = "e2b070231665d27ad9ec9b8df639893f46727666c6767db40317fbe920a5d998" dependencies = [ "backtrace", "bytes", @@ -3152,7 +3256,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] @@ -3168,9 +3272,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.11" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" +checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" dependencies = [ "bytes", "futures-core", @@ -3196,15 +3300,15 @@ dependencies = [ [[package]] name = "tower-layer" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" [[package]] name = "tower-service" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" @@ -3225,7 +3329,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] @@ -3270,7 +3374,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] @@ -3304,7 +3408,7 @@ dependencies = [ "semver", "serde", "serde_json", - "syn 2.0.72", + "syn 2.0.77", "thiserror", "unicode-ident", ] @@ -3322,7 +3426,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.72", + "syn 2.0.77", "typify-impl", ] @@ -3431,34 +3535,35 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5" dependencies = [ "cfg-if", + "once_cell", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +checksum = "9de396da306523044d3302746f1208fa71d7532227f15e347e2d93e4145dd77b" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.42" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76bc14366121efc8dbb487ab05bcc9d346b3b5ec0eaa76e46594cabbe51762c0" +checksum = "61e9300f63a621e96ed275155c108eb6f843b6a26d053f122ab69724559dc8ed" dependencies = [ "cfg-if", "js-sys", @@ -3468,9 +3573,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +checksum = "585c4c91a46b072c92e908d99cb1dcdf95c5218eeb6f3bf1efa991ee7a68cccf" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3478,22 +3583,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" [[package]] name = "wasm-streams" @@ -3510,9 +3615,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.69" +version = "0.3.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" +checksum = "26fdeaafd9bd129f65e7c031593c24d62186301e0c72c8978fa1678be7d532c0" dependencies = [ "js-sys", "wasm-bindgen", @@ -3533,49 +3638,55 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.52.6", + "windows-targets", ] [[package]] -name = "windows-sys" -version = "0.48.0" +name = "windows-registry" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" dependencies = [ - "windows-targets 0.48.5", + "windows-result", + "windows-strings", + "windows-targets", ] [[package]] -name = "windows-sys" -version = "0.52.0" +name = "windows-result" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" dependencies = [ - "windows-targets 0.52.6", + "windows-result", + "windows-targets", ] [[package]] name = "windows-sys" -version = "0.59.0" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.6", + "windows-targets", ] [[package]] -name = "windows-targets" -version = "0.48.5" +name = "windows-sys" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", + "windows-targets", ] [[package]] @@ -3584,46 +3695,28 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm 0.52.6", - "windows_aarch64_msvc 0.52.6", - "windows_i686_gnu 0.52.6", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", "windows_i686_gnullvm", - "windows_i686_msvc 0.52.6", - "windows_x86_64_gnu 0.52.6", - "windows_x86_64_gnullvm 0.52.6", - "windows_x86_64_msvc 0.52.6", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", ] -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" -[[package]] -name = "windows_i686_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -3636,64 +3729,30 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" - [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" - [[package]] name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" -[[package]] -name = "winreg" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" -dependencies = [ - "cfg-if", - "windows-sys 0.48.0", -] - [[package]] name = "xz2" version = "0.1.7" @@ -3721,7 +3780,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] @@ -3769,9 +3828,9 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.12+zstd.1.5.6" +version = "2.0.13+zstd.1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a4e40c320c3cb459d9a9ff6de98cff88f4751ee9275d140e2be94a2b74e4c13" +checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" dependencies = [ "cc", "pkg-config", diff --git a/Cargo.toml b/Cargo.toml index 8635776e..3f6b8e5d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,8 +36,8 @@ substrait = ["dep:datafusion-substrait"] [dependencies] tokio = { version = "1.39", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" -pyo3 = { version = "0.21", features = ["extension-module", "abi3", "abi3-py38"] } -arrow = { version = "52", feature = ["pyarrow"] } +pyo3 = { version = "0.22", features = ["extension-module", "abi3", "abi3-py38"] } +arrow = { version = "53", feature = ["pyarrow"] } datafusion = { version = "41.0.0", features = ["pyarrow", "avro", "unicode_expressions"] } datafusion-substrait = { version = "41.0.0", optional = true } prost = "0.12" # keep in line with `datafusion-substrait` @@ -53,7 +53,7 @@ syn = "2.0.68" url = "2" [build-dependencies] -pyo3-build-config = "0.21" +pyo3-build-config = "0.22" [lib] name = "datafusion_python" @@ -63,3 +63,6 @@ crate-type = ["cdylib", "rlib"] lto = true codegen-units = 1 +[patch.crates-io] +datafusion = { path = "../../arrow-datafusion-main/datafusion/core" } +datafusion-substrait = { path = "../../arrow-datafusion-main/datafusion/substrait" } diff --git a/src/common/schema.rs b/src/common/schema.rs index 5806c90e..186191d6 100644 --- a/src/common/schema.rs +++ b/src/common/schema.rs @@ -163,35 +163,29 @@ impl TableSource for SqlTableSource { self.schema.clone() } - fn supports_filter_pushdown( - &self, - filter: &Expr, - ) -> datafusion::common::Result { - let filters = split_conjunction(filter); - if filters.iter().all(|f| is_supported_push_down_expr(f)) { - // Push down filters to the tablescan operation if all are supported - Ok(TableProviderFilterPushDown::Exact) - } else if filters.iter().any(|f| is_supported_push_down_expr(f)) { - // Partially apply the filter in the TableScan but retain - // the Filter operator in the plan as well - Ok(TableProviderFilterPushDown::Inexact) - } else { - Ok(TableProviderFilterPushDown::Unsupported) - } - } - fn table_type(&self) -> datafusion::logical_expr::TableType { datafusion::logical_expr::TableType::Base } - #[allow(deprecated)] fn supports_filters_pushdown( &self, filters: &[&Expr], ) -> datafusion::common::Result> { filters .iter() - .map(|f| self.supports_filter_pushdown(f)) + .map(|f| { + let filters = split_conjunction(f); + if filters.iter().all(|f| is_supported_push_down_expr(f)) { + // Push down filters to the tablescan operation if all are supported + Ok(TableProviderFilterPushDown::Exact) + } else if filters.iter().any(|f| is_supported_push_down_expr(f)) { + // Partially apply the filter in the TableScan but retain + // the Filter operator in the plan as well + Ok(TableProviderFilterPushDown::Inexact) + } else { + Ok(TableProviderFilterPushDown::Unsupported) + } + }) .collect() } From 9cea1fb51831bb8faf544f92f936b4de60d3e3e6 Mon Sep 17 00:00:00 2001 From: Matt Green Date: Mon, 9 Sep 2024 16:03:09 -0700 Subject: [PATCH 02/26] update get_logical_plan signature --- src/common/schema.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/schema.rs b/src/common/schema.rs index 186191d6..3cf22282 100644 --- a/src/common/schema.rs +++ b/src/common/schema.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use std::any::Any; +use std::{any::Any, borrow::Cow}; use datafusion::arrow::datatypes::SchemaRef; use datafusion::logical_expr::{Expr, TableProviderFilterPushDown, TableSource}; @@ -189,7 +189,7 @@ impl TableSource for SqlTableSource { .collect() } - fn get_logical_plan(&self) -> Option<&datafusion::logical_expr::LogicalPlan> { + fn get_logical_plan(&self) -> Option> { None } } From 6fca28b9139fb614f771eff89f091a40adb5da5c Mon Sep 17 00:00:00 2001 From: Matt Green Date: Mon, 9 Sep 2024 16:31:58 -0700 Subject: [PATCH 03/26] remove row_number() function row_number was converted to a UDF in datafusion v42 https://github.com/apache/datafusion/pull/12030 This specific functionality needs to be added back in. --- src/functions.rs | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/functions.rs b/src/functions.rs index b9ca6301..fd31fde3 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -755,16 +755,6 @@ pub fn lag( add_builder_fns_to_window(window_fn, partition_by, order_by) } -#[pyfunction] -pub fn row_number( - partition_by: Option>, - order_by: Option>, -) -> PyResult { - let window_fn = window_function::row_number(); - - add_builder_fns_to_window(window_fn, partition_by, order_by) -} - #[pyfunction] pub fn rank(partition_by: Option>, order_by: Option>) -> PyResult { let window_fn = window_function::rank(); @@ -1002,7 +992,6 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { // Window Functions m.add_wrapped(wrap_pyfunction!(lead))?; m.add_wrapped(wrap_pyfunction!(lag))?; - m.add_wrapped(wrap_pyfunction!(row_number))?; m.add_wrapped(wrap_pyfunction!(rank))?; m.add_wrapped(wrap_pyfunction!(dense_rank))?; m.add_wrapped(wrap_pyfunction!(percent_rank))?; From f2b3d3b69898c44ee54f7c1f663603c48505b508 Mon Sep 17 00:00:00 2001 From: Matt Green Date: Mon, 9 Sep 2024 16:43:04 -0700 Subject: [PATCH 04/26] remove unneeded dependency --- src/udf.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/udf.rs b/src/udf.rs index 8bd9021d..1530036c 100644 --- a/src/udf.rs +++ b/src/udf.rs @@ -21,7 +21,7 @@ use pyo3::{prelude::*, types::PyTuple}; use datafusion::arrow::array::{make_array, Array, ArrayData, ArrayRef}; use datafusion::arrow::datatypes::DataType; -use datafusion::arrow::pyarrow::{FromPyArrow, PyArrowType, ToPyArrow}; +use datafusion::arrow::pyarrow::{PyArrowType, ToPyArrow}; use datafusion::error::DataFusionError; use datafusion::logical_expr::create_udf; use datafusion::logical_expr::function::ScalarFunctionImplementation; From 4b45a4b6365a1e61eb317623f07aaf0c1684b7b5 Mon Sep 17 00:00:00 2001 From: Matt Green Date: Mon, 9 Sep 2024 17:04:19 -0700 Subject: [PATCH 05/26] fix pyo3 warnings Implicit defaults for trailing optional arguments have been deprecated in pyo3 v0.22.0 https://github.com/PyO3/pyo3/pull/4078 --- src/common/data_type.rs | 8 ++++---- src/common/schema.rs | 1 + src/context.rs | 8 ++++++++ src/dataframe.rs | 1 + src/functions.rs | 20 ++++++++++++++++++++ 5 files changed, 34 insertions(+), 4 deletions(-) diff --git a/src/common/data_type.rs b/src/common/data_type.rs index a29d1799..7f9c75bf 100644 --- a/src/common/data_type.rs +++ b/src/common/data_type.rs @@ -24,7 +24,7 @@ use pyo3::{exceptions::PyValueError, prelude::*}; use crate::errors::py_datafusion_err; #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[pyclass(name = "RexType", module = "datafusion.common")] +#[pyclass(eq, eq_int, name = "RexType", module = "datafusion.common")] pub enum RexType { Alias, Literal, @@ -692,7 +692,7 @@ impl From for PyDataType { /// Represents the possible Python types that can be mapped to the SQL types #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[pyclass(name = "PythonType", module = "datafusion.common")] +#[pyclass(eq, eq_int, name = "PythonType", module = "datafusion.common")] pub enum PythonType { Array, Bool, @@ -712,7 +712,7 @@ pub enum PythonType { #[allow(non_camel_case_types)] #[allow(clippy::upper_case_acronyms)] #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[pyclass(name = "SqlType", module = "datafusion.common")] +#[pyclass(eq, eq_int, name = "SqlType", module = "datafusion.common")] pub enum SqlType { ANY, ARRAY, @@ -770,7 +770,7 @@ pub enum SqlType { #[allow(non_camel_case_types)] #[allow(clippy::upper_case_acronyms)] #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[pyclass(name = "NullTreatment", module = "datafusion.common")] +#[pyclass(eq, eq_int, name = "NullTreatment", module = "datafusion.common")] pub enum NullTreatment { IGNORE_NULLS, RESPECT_NULLS, diff --git a/src/common/schema.rs b/src/common/schema.rs index 3cf22282..66ce925a 100644 --- a/src/common/schema.rs +++ b/src/common/schema.rs @@ -62,6 +62,7 @@ pub struct SqlTable { #[pymethods] impl SqlTable { #[new] + #[pyo3(signature = (table_name, columns, row_count, filepaths=None))] pub fn new( table_name: String, columns: Vec<(String, DataTypeMap)>, diff --git a/src/context.rs b/src/context.rs index 11b9fed5..b7d90ab7 100644 --- a/src/context.rs +++ b/src/context.rs @@ -294,6 +294,7 @@ impl PySessionContext { } /// Register an object store with the given name + #[pyo3(signature = (scheme, store, host=None))] pub fn register_object_store( &mut self, scheme: &str, @@ -374,6 +375,7 @@ impl PySessionContext { Ok(PyDataFrame::new(df)) } + #[pyo3(signature = (query, options=None))] pub fn sql_with_options( &mut self, query: &str, @@ -390,6 +392,7 @@ impl PySessionContext { Ok(PyDataFrame::new(df)) } + #[pyo3(signature = (partitions, name=None, schema=None))] pub fn create_dataframe( &mut self, partitions: PyArrowType>>, @@ -433,6 +436,7 @@ impl PySessionContext { } /// Construct datafusion dataframe from Python list + #[pyo3(signature = (data, name=None))] pub fn from_pylist( &mut self, data: Bound<'_, PyList>, @@ -452,6 +456,7 @@ impl PySessionContext { } /// Construct datafusion dataframe from Python dictionary + #[pyo3(signature = (data, name=None))] pub fn from_pydict( &mut self, data: Bound<'_, PyDict>, @@ -471,6 +476,7 @@ impl PySessionContext { } /// Construct datafusion dataframe from Arrow Table + #[pyo3(signature = (data, name=None))] pub fn from_arrow( &mut self, data: Bound<'_, PyAny>, @@ -506,6 +512,7 @@ impl PySessionContext { /// Construct datafusion dataframe from pandas #[allow(clippy::wrong_self_convention)] + #[pyo3(signature = (data, name=None))] pub fn from_pandas( &mut self, data: Bound<'_, PyAny>, @@ -525,6 +532,7 @@ impl PySessionContext { } /// Construct datafusion dataframe from polars + #[pyo3(signature = (data, name=None))] pub fn from_polars( &mut self, data: Bound<'_, PyAny>, diff --git a/src/dataframe.rs b/src/dataframe.rs index f33622cc..09be1e5c 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -504,6 +504,7 @@ impl PyDataFrame { Ok(table) } + #[pyo3(signature = (requested_schema=None))] fn __arrow_c_stream__<'py>( &'py mut self, py: Python<'py>, diff --git a/src/functions.rs b/src/functions.rs index fd31fde3..a3591190 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -96,6 +96,7 @@ fn array_cat(exprs: Vec) -> PyExpr { } #[pyfunction] +#[pyo3(signature = (array, element, index=None))] fn array_position(array: PyExpr, element: PyExpr, index: Option) -> PyExpr { let index = ScalarValue::Int64(index); let index = Expr::Literal(index); @@ -104,6 +105,7 @@ fn array_position(array: PyExpr, element: PyExpr, index: Option) -> PyExpr } #[pyfunction] +#[pyo3(signature = (array, begin, end, stride=None))] fn array_slice(array: PyExpr, begin: PyExpr, end: PyExpr, stride: Option) -> PyExpr { datafusion::functions_nested::expr_fn::array_slice( array.into(), @@ -142,16 +144,19 @@ fn concat_ws(sep: String, args: Vec) -> PyResult { } #[pyfunction] +#[pyo3(signature = (values, regex, flags=None))] fn regexp_like(values: PyExpr, regex: PyExpr, flags: Option) -> PyResult { Ok(functions::expr_fn::regexp_like(values.expr, regex.expr, flags.map(|x| x.expr)).into()) } #[pyfunction] +#[pyo3(signature = (values, regex, flags=None))] fn regexp_match(values: PyExpr, regex: PyExpr, flags: Option) -> PyResult { Ok(functions::expr_fn::regexp_match(values.expr, regex.expr, flags.map(|x| x.expr)).into()) } #[pyfunction] +#[pyo3(signature = (string, pattern, replacement, flags=None))] /// Replaces substring(s) matching a POSIX regular expression. fn regexp_replace( string: PyExpr, @@ -283,6 +288,7 @@ fn find_window_fn(name: &str, ctx: Option) -> PyResult, @@ -331,6 +337,7 @@ macro_rules! aggregate_function { }; ($NAME: ident, $($arg:ident)*) => { #[pyfunction] + #[pyo3(signature = ($($arg),*, distinct=None, filter=None, order_by=None, null_treatment=None))] fn $NAME( $($arg: PyExpr),*, distinct: Option, @@ -351,6 +358,7 @@ macro_rules! aggregate_function_vec_args { }; ($NAME: ident, $($arg:ident)*) => { #[pyfunction] + #[pyo3(signature = ($($arg),*, distinct=None, filter=None, order_by=None, null_treatment=None))] fn $NAME( $($arg: PyExpr),*, distinct: Option, @@ -624,6 +632,7 @@ aggregate_function!(approx_median); // aggregate_function!(grouping); #[pyfunction] +#[pyo3(signature = (expression, percentile, num_centroids=None, filter=None))] pub fn approx_percentile_cont( expression: PyExpr, percentile: f64, @@ -642,6 +651,7 @@ pub fn approx_percentile_cont( } #[pyfunction] +#[pyo3(signature = (expression, weight, percentile, filter=None))] pub fn approx_percentile_cont_with_weight( expression: PyExpr, weight: PyExpr, @@ -662,6 +672,7 @@ aggregate_function_vec_args!(last_value); // We handle first_value explicitly because the signature expects an order_by // https://github.com/apache/datafusion/issues/12376 #[pyfunction] +#[pyo3(signature = (expr, distinct=None, filter=None, order_by=None, null_treatment=None))] pub fn first_value( expr: PyExpr, distinct: Option, @@ -677,6 +688,7 @@ pub fn first_value( // nth_value requires a non-expr argument #[pyfunction] +#[pyo3(signature = (expr, n, distinct=None, filter=None, order_by=None, null_treatment=None))] pub fn nth_value( expr: PyExpr, n: i64, @@ -691,6 +703,7 @@ pub fn nth_value( // string_agg requires a non-expr argument #[pyfunction] +#[pyo3(signature = (expr, delimiter, distinct=None, filter=None, order_by=None, null_treatment=None))] pub fn string_agg( expr: PyExpr, delimiter: String, @@ -730,6 +743,7 @@ fn add_builder_fns_to_window( } #[pyfunction] +#[pyo3(signature = (arg, shift_offset, default_value=None, partition_by=None, order_by=None))] pub fn lead( arg: PyExpr, shift_offset: i64, @@ -743,6 +757,7 @@ pub fn lead( } #[pyfunction] +#[pyo3(signature = (arg, shift_offset, default_value=None, partition_by=None, order_by=None))] pub fn lag( arg: PyExpr, shift_offset: i64, @@ -756,6 +771,7 @@ pub fn lag( } #[pyfunction] +#[pyo3(signature = (partition_by=None, order_by=None))] pub fn rank(partition_by: Option>, order_by: Option>) -> PyResult { let window_fn = window_function::rank(); @@ -763,6 +779,7 @@ pub fn rank(partition_by: Option>, order_by: Option>) -> } #[pyfunction] +#[pyo3(signature = (partition_by=None, order_by=None))] pub fn dense_rank( partition_by: Option>, order_by: Option>, @@ -773,6 +790,7 @@ pub fn dense_rank( } #[pyfunction] +#[pyo3(signature = (partition_by=None, order_by=None))] pub fn percent_rank( partition_by: Option>, order_by: Option>, @@ -783,6 +801,7 @@ pub fn percent_rank( } #[pyfunction] +#[pyo3(signature = (partition_by=None, order_by=None))] pub fn cume_dist( partition_by: Option>, order_by: Option>, @@ -793,6 +812,7 @@ pub fn cume_dist( } #[pyfunction] +#[pyo3(signature = (arg, partition_by=None, order_by=None))] pub fn ntile( arg: PyExpr, partition_by: Option>, From 6353aa9e6cb296b67945014b465609d1d7099f18 Mon Sep 17 00:00:00 2001 From: Matt Green Date: Mon, 9 Sep 2024 21:28:28 -0700 Subject: [PATCH 06/26] update object_store dependency --- Cargo.lock | 138 ++++++++++++++----------------------------------- Cargo.toml | 2 +- src/context.rs | 2 +- 3 files changed, 41 insertions(+), 101 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bc6c9a5a..9b698997 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -375,7 +375,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn", ] [[package]] @@ -386,7 +386,7 @@ checksum = "a27b8a3a6e1a44fa4c8baf1f653e4172e81486d4941f2237e20dc2d0cf4ddff1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn", ] [[package]] @@ -785,7 +785,7 @@ dependencies = [ "log", "num-traits", "num_cpus", - "object_store 0.11.0", + "object_store", "parking_lot", "parquet", "paste", @@ -830,7 +830,7 @@ dependencies = [ "instant", "libc", "num_cpus", - "object_store 0.11.0", + "object_store", "parquet", "paste", "pyo3", @@ -858,7 +858,7 @@ dependencies = [ "futures", "hashbrown", "log", - "object_store 0.11.0", + "object_store", "parking_lot", "rand", "tempfile", @@ -1096,7 +1096,7 @@ dependencies = [ "datafusion-substrait", "futures", "mimalloc", - "object_store 0.10.2", + "object_store", "parking_lot", "prost 0.12.6", "prost-types 0.12.6", @@ -1104,7 +1104,7 @@ dependencies = [ "pyo3-build-config", "rand", "regex-syntax", - "syn 2.0.77", + "syn", "tokio", "url", "uuid", @@ -1134,7 +1134,7 @@ dependencies = [ "chrono", "datafusion", "itertools 0.13.0", - "object_store 0.11.0", + "object_store", "pbjson-types", "prost 0.13.2", "substrait", @@ -1152,12 +1152,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "doc-comment" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" - [[package]] name = "dyn-clone" version = "1.0.17" @@ -1289,7 +1283,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn", ] [[package]] @@ -1958,9 +1952,9 @@ dependencies = [ [[package]] name = "object_store" -version = "0.10.2" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6da452820c715ce78221e8202ccc599b4a52f3e1eb3eedb487b680c81a8e3f3" +checksum = "25a0c4b3a0e31f8b66f71ad8064521efa773910196e2cde791436f13409f3b45" dependencies = [ "async-trait", "base64 0.22.1", @@ -1980,28 +1974,7 @@ dependencies = [ "rustls-pemfile", "serde", "serde_json", - "snafu 0.7.5", - "tokio", - "tracing", - "url", - "walkdir", -] - -[[package]] -name = "object_store" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25a0c4b3a0e31f8b66f71ad8064521efa773910196e2cde791436f13409f3b45" -dependencies = [ - "async-trait", - "bytes", - "chrono", - "futures", - "humantime", - "itertools 0.13.0", - "parking_lot", - "percent-encoding", - "snafu 0.8.4", + "snafu", "tokio", "tracing", "url", @@ -2077,7 +2050,7 @@ dependencies = [ "lz4_flex", "num", "num-bigint", - "object_store 0.11.0", + "object_store", "paste", "seq-macro", "snap", @@ -2211,7 +2184,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn", ] [[package]] @@ -2254,7 +2227,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479cf940fbbb3426c32c5d5176f62ad57549a0bb84773423ba8be9d089f5faba" dependencies = [ "proc-macro2", - "syn 2.0.77", + "syn", ] [[package]] @@ -2303,7 +2276,7 @@ dependencies = [ "prost 0.13.2", "prost-types 0.13.2", "regex", - "syn 2.0.77", + "syn", "tempfile", ] @@ -2317,7 +2290,7 @@ dependencies = [ "itertools 0.12.1", "proc-macro2", "quote", - "syn 2.0.77", + "syn", ] [[package]] @@ -2330,7 +2303,7 @@ dependencies = [ "itertools 0.13.0", "proc-macro2", "quote", - "syn 2.0.77", + "syn", ] [[package]] @@ -2407,7 +2380,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.77", + "syn", ] [[package]] @@ -2420,7 +2393,7 @@ dependencies = [ "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.77", + "syn", ] [[package]] @@ -2798,7 +2771,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.77", + "syn", ] [[package]] @@ -2862,7 +2835,7 @@ checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn", ] [[package]] @@ -2873,7 +2846,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn", ] [[package]] @@ -2897,7 +2870,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.77", + "syn", ] [[package]] @@ -2963,35 +2936,13 @@ version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" -[[package]] -name = "snafu" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4de37ad025c587a29e8f3f5605c00f70b98715ef90b9061a815b9e59e9042d6" -dependencies = [ - "doc-comment", - "snafu-derive 0.7.5", -] - [[package]] name = "snafu" version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b835cb902660db3415a672d862905e791e54d306c6e8189168c7f3d9ae1c79d" dependencies = [ - "snafu-derive 0.8.4", -] - -[[package]] -name = "snafu-derive" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "990079665f075b699031e9c08fd3ab99be5029b96f3b78dc0709e8f77e4efebf" -dependencies = [ - "heck 0.4.1", - "proc-macro2", - "quote", - "syn 1.0.109", + "snafu-derive", ] [[package]] @@ -3003,7 +2954,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.77", + "syn", ] [[package]] @@ -3046,7 +2997,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn", ] [[package]] @@ -3080,7 +3031,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.77", + "syn", ] [[package]] @@ -3093,7 +3044,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.77", + "syn", ] [[package]] @@ -3116,7 +3067,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.77", + "syn", "typify", "walkdir", ] @@ -3127,17 +3078,6 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - [[package]] name = "syn" version = "2.0.77" @@ -3194,7 +3134,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn", ] [[package]] @@ -3256,7 +3196,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn", ] [[package]] @@ -3329,7 +3269,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn", ] [[package]] @@ -3374,7 +3314,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn", ] [[package]] @@ -3408,7 +3348,7 @@ dependencies = [ "semver", "serde", "serde_json", - "syn 2.0.77", + "syn", "thiserror", "unicode-ident", ] @@ -3426,7 +3366,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.77", + "syn", "typify-impl", ] @@ -3555,7 +3495,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.77", + "syn", "wasm-bindgen-shared", ] @@ -3589,7 +3529,7 @@ checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3780,7 +3720,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 3f6b8e5d..126aeb7c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,7 +46,7 @@ uuid = { version = "1.9", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] } async-trait = "0.1" futures = "0.3" -object_store = { version = "0.10.1", features = ["aws", "gcp", "azure"] } +object_store = { version = "0.11.0", features = ["aws", "gcp", "azure"] } parking_lot = "0.12" regex-syntax = "0.8" syn = "2.0.68" diff --git a/src/context.rs b/src/context.rs index b7d90ab7..fa896e75 100644 --- a/src/context.rs +++ b/src/context.rs @@ -86,7 +86,7 @@ impl PySessionConfig { let mut config = SessionConfig::new(); if let Some(hash_map) = config_options { for (k, v) in &hash_map { - config = config.set(k, ScalarValue::Utf8(Some(v.clone()))); + config = config.set(k, &ScalarValue::Utf8(Some(v.clone()))); } } From 815b6d74f27e25366ef2384aab179b04fb152817 Mon Sep 17 00:00:00 2001 From: Matt Green Date: Mon, 9 Sep 2024 21:50:00 -0700 Subject: [PATCH 07/26] change PyExpr -> PySortExpr --- src/context.rs | 8 ++--- src/dataframe.rs | 6 ++-- src/expr.rs | 17 +++------- src/expr/sort_expr.rs | 9 ++++- src/functions.rs | 78 +++++++++++++++++++++++-------------------- 5 files changed, 60 insertions(+), 58 deletions(-) diff --git a/src/context.rs b/src/context.rs index fa896e75..79db2e65 100644 --- a/src/context.rs +++ b/src/context.rs @@ -35,7 +35,7 @@ use crate::catalog::{PyCatalog, PyTable}; use crate::dataframe::PyDataFrame; use crate::dataset::Dataset; use crate::errors::{py_datafusion_err, DataFusionError}; -use crate::expr::PyExpr; +use crate::expr::sort_expr::PySortExpr; use crate::physical_plan::PyExecutionPlan; use crate::record_batch::PyRecordBatchStream; use crate::sql::logical::PyLogicalPlan; @@ -333,7 +333,7 @@ impl PySessionContext { table_partition_cols: Vec<(String, String)>, file_extension: &str, schema: Option>, - file_sort_order: Option>>, + file_sort_order: Option>>, py: Python, ) -> PyResult<()> { let options = ListingOptions::new(Arc::new(ParquetFormat::new())) @@ -589,7 +589,7 @@ impl PySessionContext { file_extension: &str, skip_metadata: bool, schema: Option>, - file_sort_order: Option>>, + file_sort_order: Option>>, py: Python, ) -> PyResult<()> { let mut options = ParquetReadOptions::default() @@ -890,7 +890,7 @@ impl PySessionContext { file_extension: &str, skip_metadata: bool, schema: Option>, - file_sort_order: Option>>, + file_sort_order: Option>>, py: Python, ) -> PyResult { let mut options = ParquetReadOptions::default() diff --git a/src/dataframe.rs b/src/dataframe.rs index 09be1e5c..80b5582b 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -40,12 +40,12 @@ use pyo3::types::{PyCapsule, PyTuple}; use tokio::task::JoinHandle; use crate::errors::py_datafusion_err; -use crate::expr::to_sort_expressions; +use crate::expr::sort_expr::to_sort_expressions; use crate::physical_plan::PyExecutionPlan; use crate::record_batch::PyRecordBatchStream; use crate::sql::logical::PyLogicalPlan; use crate::utils::{get_tokio_runtime, wait_for_future}; -use crate::{errors::DataFusionError, expr::PyExpr}; +use crate::{errors::DataFusionError, expr::{PyExpr, sort_expr::PySortExpr}}; /// A PyDataFrame is a representation of a logical plan and an API to compose statements. /// Use it to build a plan and `.collect()` to execute the plan and collect the result. @@ -196,7 +196,7 @@ impl PyDataFrame { } #[pyo3(signature = (*exprs))] - fn sort(&self, exprs: Vec) -> PyResult { + fn sort(&self, exprs: Vec) -> PyResult { let exprs = to_sort_expressions(exprs); let df = self.df.as_ref().clone().sort(exprs)?; Ok(Self::new(df)) diff --git a/src/expr.rs b/src/expr.rs index ab16f287..823b1143 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -94,6 +94,8 @@ pub mod unnest; pub mod unnest_expr; pub mod window; +use sort_expr::{PySortExpr, to_sort_expressions}; + /// A PyExpr that can be used on a DataFrame #[pyclass(name = "Expr", module = "datafusion.expr", subclass)] #[derive(Debug, Clone)] @@ -518,7 +520,7 @@ impl PyExpr { // Expression Function Builder functions - pub fn order_by(&self, order_by: Vec) -> PyExprFuncBuilder { + pub fn order_by(&self, order_by: Vec) -> PyExprFuncBuilder { self.expr .clone() .order_by(to_sort_expressions(order_by)) @@ -562,20 +564,9 @@ impl From for PyExprFuncBuilder { } } -pub fn to_sort_expressions(order_by: Vec) -> Vec { - order_by - .iter() - .map(|e| e.expr.clone()) - .map(|e| match e { - Expr::Sort(_) => e, - _ => e.sort(true, true), - }) - .collect() -} - #[pymethods] impl PyExprFuncBuilder { - pub fn order_by(&self, order_by: Vec) -> PyExprFuncBuilder { + pub fn order_by(&self, order_by: Vec) -> PyExprFuncBuilder { self.builder .clone() .order_by(to_sort_expressions(order_by)) diff --git a/src/expr/sort_expr.rs b/src/expr/sort_expr.rs index 4299d1f7..55a05922 100644 --- a/src/expr/sort_expr.rs +++ b/src/expr/sort_expr.rs @@ -51,10 +51,17 @@ impl Display for PySortExpr { } } +pub fn to_sort_expressions(order_by: Vec) -> Vec { + order_by + .iter() + .map(|e| e.sort.clone()) + .collect() +} + #[pymethods] impl PySortExpr { fn expr(&self) -> PyResult { - Ok((*self.sort.expr).clone().into()) + Ok(self.sort.expr.clone().into()) } fn ascending(&self) -> PyResult { diff --git a/src/functions.rs b/src/functions.rs index a3591190..7765f522 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -25,7 +25,8 @@ use crate::common::data_type::NullTreatment; use crate::context::PySessionContext; use crate::errors::DataFusionError; use crate::expr::conditional_expr::PyCaseBuilder; -use crate::expr::to_sort_expressions; +use crate::expr::sort_expr::to_sort_expressions; +use crate::expr::sort_expr::PySortExpr; use crate::expr::window::PyWindowFrame; use crate::expr::PyExpr; use datafusion::common::{Column, ScalarValue, TableReference}; @@ -35,7 +36,7 @@ use datafusion::functions_aggregate; use datafusion::logical_expr::expr::Alias; use datafusion::logical_expr::sqlparser::ast::NullTreatment as DFNullTreatment; use datafusion::logical_expr::{ - expr::{find_df_window_func, Sort, WindowFunction}, + expr::{find_df_window_func, WindowFunction}, lit, Expr, WindowFunctionDefinition, }; @@ -43,7 +44,7 @@ fn add_builder_fns_to_aggregate( agg_fn: Expr, distinct: Option, filter: Option, - order_by: Option>, + order_by: Option>, null_treatment: Option, ) -> PyResult { // Since ExprFuncBuilder::new() is private, we can guarantee initializing @@ -174,14 +175,16 @@ fn regexp_replace( } /// Creates a new Sort Expr #[pyfunction] -fn order_by(expr: PyExpr, asc: bool, nulls_first: bool) -> PyResult { - Ok(PyExpr { - expr: datafusion::logical_expr::Expr::Sort(Sort { - expr: Box::new(expr.expr), - asc, - nulls_first, - }), - }) +fn order_by(expr: PyExpr, asc: bool, nulls_first: bool) -> PyResult { + Ok( + PySortExpr::from( + datafusion::logical_expr::expr::Sort { + expr: expr.expr, + asc, + nulls_first, + } + ) + ) } /// Creates a new Alias Expr @@ -342,7 +345,7 @@ macro_rules! aggregate_function { $($arg: PyExpr),*, distinct: Option, filter: Option, - order_by: Option>, + order_by: Option>, null_treatment: Option ) -> PyResult { let agg_fn = functions_aggregate::expr_fn::$NAME($($arg.into()),*); @@ -363,7 +366,7 @@ macro_rules! aggregate_function_vec_args { $($arg: PyExpr),*, distinct: Option, filter: Option, - order_by: Option>, + order_by: Option>, null_treatment: Option ) -> PyResult { let agg_fn = functions_aggregate::expr_fn::$NAME(vec![$($arg.into()),*]); @@ -677,7 +680,7 @@ pub fn first_value( expr: PyExpr, distinct: Option, filter: Option, - order_by: Option>, + order_by: Option>, null_treatment: Option, ) -> PyResult { // If we initialize the UDAF with order_by directly, then it gets over-written by the builder @@ -687,19 +690,20 @@ pub fn first_value( } // nth_value requires a non-expr argument -#[pyfunction] -#[pyo3(signature = (expr, n, distinct=None, filter=None, order_by=None, null_treatment=None))] -pub fn nth_value( - expr: PyExpr, - n: i64, - distinct: Option, - filter: Option, - order_by: Option>, - null_treatment: Option, -) -> PyResult { - let agg_fn = datafusion::functions_aggregate::nth_value::nth_value(vec![expr.expr, lit(n)]); - add_builder_fns_to_aggregate(agg_fn, distinct, filter, order_by, null_treatment) -} +// #[pyfunction] +// #[pyo3(signature = (expr, n, distinct=None, filter=None, order_by=None, null_treatment=None))] +// pub fn nth_value( +// expr: PyExpr, +// n: i64, +// distinct: Option, +// filter: Option, +// order_by: Option>, +// null_treatment: Option, +// ) -> PyResult { +// // @todo: Commenting this function out for now as it requires some reworking +// let agg_fn = datafusion::functions_aggregate::nth_value::nth_value(vec![expr.expr, lit(n)]); +// add_builder_fns_to_aggregate(agg_fn, distinct, filter, order_by, null_treatment) +// } // string_agg requires a non-expr argument #[pyfunction] @@ -709,7 +713,7 @@ pub fn string_agg( delimiter: String, distinct: Option, filter: Option, - order_by: Option>, + order_by: Option>, null_treatment: Option, ) -> PyResult { let agg_fn = datafusion::functions_aggregate::string_agg::string_agg(expr.expr, lit(delimiter)); @@ -719,7 +723,7 @@ pub fn string_agg( fn add_builder_fns_to_window( window_fn: Expr, partition_by: Option>, - order_by: Option>, + order_by: Option>, ) -> PyResult { // Since ExprFuncBuilder::new() is private, set an empty partition and then // override later if appropriate. @@ -749,7 +753,7 @@ pub fn lead( shift_offset: i64, default_value: Option, partition_by: Option>, - order_by: Option>, + order_by: Option>, ) -> PyResult { let window_fn = window_function::lead(arg.expr, Some(shift_offset), default_value); @@ -763,7 +767,7 @@ pub fn lag( shift_offset: i64, default_value: Option, partition_by: Option>, - order_by: Option>, + order_by: Option>, ) -> PyResult { let window_fn = window_function::lag(arg.expr, Some(shift_offset), default_value); @@ -772,7 +776,7 @@ pub fn lag( #[pyfunction] #[pyo3(signature = (partition_by=None, order_by=None))] -pub fn rank(partition_by: Option>, order_by: Option>) -> PyResult { +pub fn rank(partition_by: Option>, order_by: Option>) -> PyResult { let window_fn = window_function::rank(); add_builder_fns_to_window(window_fn, partition_by, order_by) @@ -782,7 +786,7 @@ pub fn rank(partition_by: Option>, order_by: Option>) -> #[pyo3(signature = (partition_by=None, order_by=None))] pub fn dense_rank( partition_by: Option>, - order_by: Option>, + order_by: Option>, ) -> PyResult { let window_fn = window_function::dense_rank(); @@ -793,7 +797,7 @@ pub fn dense_rank( #[pyo3(signature = (partition_by=None, order_by=None))] pub fn percent_rank( partition_by: Option>, - order_by: Option>, + order_by: Option>, ) -> PyResult { let window_fn = window_function::percent_rank(); @@ -804,7 +808,7 @@ pub fn percent_rank( #[pyo3(signature = (partition_by=None, order_by=None))] pub fn cume_dist( partition_by: Option>, - order_by: Option>, + order_by: Option>, ) -> PyResult { let window_fn = window_function::cume_dist(); @@ -816,7 +820,7 @@ pub fn cume_dist( pub fn ntile( arg: PyExpr, partition_by: Option>, - order_by: Option>, + order_by: Option>, ) -> PyResult { let window_fn = window_function::ntile(arg.into()); @@ -965,7 +969,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(regr_syy))?; m.add_wrapped(wrap_pyfunction!(first_value))?; m.add_wrapped(wrap_pyfunction!(last_value))?; - m.add_wrapped(wrap_pyfunction!(nth_value))?; + // m.add_wrapped(wrap_pyfunction!(nth_value))?; m.add_wrapped(wrap_pyfunction!(bit_and))?; m.add_wrapped(wrap_pyfunction!(bit_or))?; m.add_wrapped(wrap_pyfunction!(bit_xor))?; From 92806a8ee03fbba11f88ab6d3a0b0a7684addd75 Mon Sep 17 00:00:00 2001 From: Matt Green Date: Mon, 9 Sep 2024 23:13:52 -0700 Subject: [PATCH 08/26] comment out key.extract::<&PyTuple>() condition statement --- src/dataframe.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/dataframe.rs b/src/dataframe.rs index 80b5582b..07c5f7f3 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -70,13 +70,14 @@ impl PyDataFrame { if let Ok(key) = key.extract::() { // df[col] self.select_columns(vec![key]) - } else if let Ok(tuple) = key.extract::<&PyTuple>() { - // df[col1, col2, col3] - let keys = tuple - .iter() - .map(|item| item.extract::()) - .collect::>>()?; - self.select_columns(keys) + // } else if let Ok(tuple) = key.extract::<&PyTuple>() { + // @todo: make this branch work + // // df[col1, col2, col3] + // let keys = tuple + // .iter() + // .map(|item| item.extract::()) + // .collect::>>()?; + // self.select_columns(keys) } else if let Ok(keys) = key.extract::>() { // df[[col1, col2, col3]] self.select_columns(keys) From e2fa24effa9058df64eccbf03e88d1326a1bf607 Mon Sep 17 00:00:00 2001 From: Matt Green Date: Mon, 9 Sep 2024 23:48:24 -0700 Subject: [PATCH 09/26] change more instances of PyExpr > PySortExpr --- src/expr/sort.rs | 6 +++--- src/expr/sort_expr.rs | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/expr/sort.rs b/src/expr/sort.rs index b31ebfe0..a1803cca 100644 --- a/src/expr/sort.rs +++ b/src/expr/sort.rs @@ -22,7 +22,7 @@ use std::fmt::{self, Display, Formatter}; use crate::common::df_schema::PyDFSchema; use crate::expr::logical_node::LogicalNode; -use crate::expr::PyExpr; +use crate::expr::sort_expr::PySortExpr; use crate::sql::logical::PyLogicalPlan; #[pyclass(name = "Sort", module = "datafusion.expr", subclass)] @@ -63,12 +63,12 @@ impl Display for PySort { #[pymethods] impl PySort { /// Retrieves the sort expressions for this `Sort` - fn sort_exprs(&self) -> PyResult> { + fn sort_exprs(&self) -> PyResult> { Ok(self .sort .expr .iter() - .map(|e| PyExpr::from(e.clone())) + .map(|e| PySortExpr::from(e.clone())) .collect()) } diff --git a/src/expr/sort_expr.rs b/src/expr/sort_expr.rs index 55a05922..34ed91f6 100644 --- a/src/expr/sort_expr.rs +++ b/src/expr/sort_expr.rs @@ -58,6 +58,10 @@ pub fn to_sort_expressions(order_by: Vec) -> Vec { .collect() } +pub fn py_sort_expr_list(expr: &[SortExpr]) -> PyResult> { + Ok(expr.iter().map(|e| PySortExpr::from(e.clone())).collect()) +} + #[pymethods] impl PySortExpr { fn expr(&self) -> PyResult { From 21013a7dbd23cc18a4876724a8b784d77cd30eb8 Mon Sep 17 00:00:00 2001 From: Matt Green Date: Mon, 9 Sep 2024 23:48:50 -0700 Subject: [PATCH 10/26] update function signatures to use _bound versions --- src/expr/window.rs | 5 +++-- src/udf.rs | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/expr/window.rs b/src/expr/window.rs index f17a6dd9..950db12a 100644 --- a/src/expr/window.rs +++ b/src/expr/window.rs @@ -24,6 +24,7 @@ use std::fmt::{self, Display, Formatter}; use crate::common::df_schema::PyDFSchema; use crate::errors::py_type_err; use crate::expr::logical_node::LogicalNode; +use crate::expr::sort_expr::{py_sort_expr_list, PySortExpr}; use crate::expr::PyExpr; use crate::sql::logical::PyLogicalPlan; @@ -114,9 +115,9 @@ impl PyWindow { } /// Returns order by columns in a window function expression - pub fn get_sort_exprs(&self, expr: PyExpr) -> PyResult> { + pub fn get_sort_exprs(&self, expr: PyExpr) -> PyResult> { match expr.expr.unalias() { - Expr::WindowFunction(WindowFunction { order_by, .. }) => py_expr_list(&order_by), + Expr::WindowFunction(WindowFunction { order_by, .. }) => py_sort_expr_list(&order_by), other => Err(not_window_function_err(other)), } } diff --git a/src/udf.rs b/src/udf.rs index 1530036c..4d57f87b 100644 --- a/src/udf.rs +++ b/src/udf.rs @@ -20,6 +20,7 @@ use std::sync::Arc; use pyo3::{prelude::*, types::PyTuple}; use datafusion::arrow::array::{make_array, Array, ArrayData, ArrayRef}; +use datafusion::arrow::pyarrow::FromPyArrow; use datafusion::arrow::datatypes::DataType; use datafusion::arrow::pyarrow::{PyArrowType, ToPyArrow}; use datafusion::error::DataFusionError; @@ -43,16 +44,15 @@ fn to_rust_function(func: PyObject) -> ScalarFunctionImplementation { .iter() .map(|arg| arg.into_data().to_pyarrow(py).unwrap()) .collect::>(); - let py_args = PyTuple::new(py, py_args); + let py_args = PyTuple::new_bound(py, py_args); // 2. call function let value = func - .as_ref(py) - .call(py_args, None) + .call_bound(py, py_args, None) .map_err(|e| DataFusionError::Execution(format!("{e:?}")))?; // 3. cast to arrow::array::Array - let array_data = ArrayData::from_pyarrow(value).unwrap(); + let array_data = ArrayData::from_pyarrow_bound(value.bind(py)).unwrap(); Ok(make_array(array_data)) }) }, From 142e4ed7f16dc449134a8172dd713f8facad098f Mon Sep 17 00:00:00 2001 From: Matt Green Date: Tue, 10 Sep 2024 11:56:17 -0700 Subject: [PATCH 11/26] remove clone --- src/dataset.rs | 2 +- src/dataset_exec.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dataset.rs b/src/dataset.rs index de7402fd..a8fa21ec 100644 --- a/src/dataset.rs +++ b/src/dataset.rs @@ -39,7 +39,7 @@ use crate::dataset_exec::DatasetExec; use crate::pyarrow_filter_expression::PyArrowFilterExpression; // Wraps a pyarrow.dataset.Dataset class and implements a Datafusion TableProvider around it -#[derive(Debug, Clone)] +#[derive(Debug)] pub(crate) struct Dataset { dataset: PyObject, } diff --git a/src/dataset_exec.rs b/src/dataset_exec.rs index a377e255..2759aa67 100644 --- a/src/dataset_exec.rs +++ b/src/dataset_exec.rs @@ -53,7 +53,7 @@ impl Iterator for PyArrowBatchesAdapter { fn next(&mut self) -> Option { Python::with_gil(|py| { - let mut batches = self.batches.clone().into_bound(py); + let mut batches = self.batches.clone_ref(py).into_bound(py); Some( batches .next()? @@ -65,7 +65,7 @@ impl Iterator for PyArrowBatchesAdapter { } // Wraps a pyarrow.dataset.Dataset class and implements a Datafusion ExecutionPlan around it -#[derive(Debug, Clone)] +#[derive(Debug)] pub(crate) struct DatasetExec { dataset: PyObject, schema: SchemaRef, From e971add4ea0ca44c6f9445fd98ceb318436080f0 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Tue, 10 Sep 2024 08:50:22 -0400 Subject: [PATCH 12/26] Working through some of the sort requirement changes --- Cargo.lock | 108 +++++++----------- Cargo.toml | 8 +- python/datafusion/context.py | 13 ++- python/datafusion/dataframe.py | 8 +- python/datafusion/expr.py | 52 ++++++++- python/datafusion/functions.py | 93 +++++++-------- python/datafusion/tests/test_sql.py | 11 +- .../datafusion/tests/test_wrapper_coverage.py | 5 +- src/dataframe.rs | 5 +- src/expr.rs | 18 +-- src/expr/sort_expr.rs | 16 ++- src/functions.rs | 69 ++++++----- src/pyarrow_filter_expression.rs | 2 +- src/udf.rs | 2 +- 14 files changed, 225 insertions(+), 185 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9b698997..bb1d800b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -747,6 +747,7 @@ dependencies = [ [[package]] name = "datafusion" version = "41.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" dependencies = [ "ahash", "apache-avro", @@ -781,7 +782,7 @@ dependencies = [ "half", "hashbrown", "indexmap", - "itertools 0.13.0", + "itertools", "log", "num-traits", "num_cpus", @@ -804,6 +805,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "41.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" dependencies = [ "arrow-schema", "async-trait", @@ -817,6 +819,7 @@ dependencies = [ [[package]] name = "datafusion-common" version = "41.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" dependencies = [ "ahash", "apache-avro", @@ -841,6 +844,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" version = "41.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" dependencies = [ "log", "tokio", @@ -849,6 +853,7 @@ dependencies = [ [[package]] name = "datafusion-execution" version = "41.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" dependencies = [ "arrow", "chrono", @@ -868,6 +873,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "41.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" dependencies = [ "ahash", "arrow", @@ -888,6 +894,7 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "41.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" dependencies = [ "arrow", "datafusion-common", @@ -897,6 +904,7 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "41.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" dependencies = [ "arrow", "arrow-buffer", @@ -909,7 +917,7 @@ dependencies = [ "datafusion-expr", "hashbrown", "hex", - "itertools 0.13.0", + "itertools", "log", "md-5", "rand", @@ -922,6 +930,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "41.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" dependencies = [ "ahash", "arrow", @@ -941,6 +950,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "41.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" dependencies = [ "ahash", "arrow", @@ -953,6 +963,7 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" version = "41.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" dependencies = [ "arrow", "arrow-array", @@ -965,7 +976,7 @@ dependencies = [ "datafusion-functions", "datafusion-functions-aggregate", "datafusion-physical-expr-common", - "itertools 0.13.0", + "itertools", "log", "paste", "rand", @@ -974,6 +985,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" version = "41.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" dependencies = [ "datafusion-common", "datafusion-expr", @@ -984,6 +996,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "41.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" dependencies = [ "arrow", "async-trait", @@ -993,7 +1006,7 @@ dependencies = [ "datafusion-physical-expr", "hashbrown", "indexmap", - "itertools 0.13.0", + "itertools", "log", "paste", "regex-syntax", @@ -1002,6 +1015,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "41.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" dependencies = [ "ahash", "arrow", @@ -1022,7 +1036,7 @@ dependencies = [ "hashbrown", "hex", "indexmap", - "itertools 0.13.0", + "itertools", "log", "paste", "petgraph", @@ -1032,6 +1046,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "41.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" dependencies = [ "ahash", "arrow", @@ -1044,18 +1059,20 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" version = "41.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" dependencies = [ "arrow-schema", "datafusion-common", "datafusion-execution", "datafusion-physical-expr", "datafusion-physical-plan", - "itertools 0.13.0", + "itertools", ] [[package]] name = "datafusion-physical-plan" version = "41.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" dependencies = [ "ahash", "arrow", @@ -1077,7 +1094,7 @@ dependencies = [ "half", "hashbrown", "indexmap", - "itertools 0.13.0", + "itertools", "log", "once_cell", "parking_lot", @@ -1098,8 +1115,8 @@ dependencies = [ "mimalloc", "object_store", "parking_lot", - "prost 0.12.6", - "prost-types 0.12.6", + "prost", + "prost-types", "pyo3", "pyo3-build-config", "rand", @@ -1113,6 +1130,7 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "41.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" dependencies = [ "arrow", "arrow-array", @@ -1128,15 +1146,16 @@ dependencies = [ [[package]] name = "datafusion-substrait" version = "41.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" dependencies = [ "arrow-buffer", "async-recursion", "chrono", "datafusion", - "itertools 0.13.0", + "itertools", "object_store", "pbjson-types", - "prost 0.13.2", + "prost", "substrait", "url", ] @@ -1590,15 +1609,6 @@ version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "187674a687eed5fe42285b40c6291f9a01517d415fad1c3cbc6a9f778af7fcd4" -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.13.0" @@ -1963,7 +1973,7 @@ dependencies = [ "futures", "humantime", "hyper", - "itertools 0.13.0", + "itertools", "md-5", "parking_lot", "percent-encoding", @@ -2093,9 +2103,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6eea3058763d6e656105d1403cb04e0a41b7bbac6362d413e7c33be0c32279c9" dependencies = [ "heck 0.5.0", - "itertools 0.13.0", - "prost 0.13.2", - "prost-types 0.13.2", + "itertools", + "prost", + "prost-types", ] [[package]] @@ -2108,7 +2118,7 @@ dependencies = [ "chrono", "pbjson", "pbjson-build", - "prost 0.13.2", + "prost", "prost-build", "serde", ] @@ -2239,16 +2249,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "prost" -version = "0.12.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29" -dependencies = [ - "bytes", - "prost-derive 0.12.6", -] - [[package]] name = "prost" version = "0.13.2" @@ -2256,7 +2256,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b2ecbe40f08db5c006b5764a2645f7f3f141ce756412ac9e1dd6087e6d32995" dependencies = [ "bytes", - "prost-derive 0.13.2", + "prost-derive", ] [[package]] @@ -2267,32 +2267,19 @@ checksum = "f8650aabb6c35b860610e9cff5dc1af886c9e25073b7b1712a68972af4281302" dependencies = [ "bytes", "heck 0.5.0", - "itertools 0.13.0", + "itertools", "log", "multimap", "once_cell", "petgraph", "prettyplease", - "prost 0.13.2", - "prost-types 0.13.2", + "prost", + "prost-types", "regex", "syn", "tempfile", ] -[[package]] -name = "prost-derive" -version = "0.12.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1" -dependencies = [ - "anyhow", - "itertools 0.12.1", - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "prost-derive" version = "0.13.2" @@ -2300,28 +2287,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acf0c195eebb4af52c752bec4f52f645da98b6e92077a04110c7f349477ae5ac" dependencies = [ "anyhow", - "itertools 0.13.0", + "itertools", "proc-macro2", "quote", "syn", ] -[[package]] -name = "prost-types" -version = "0.12.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9091c90b0a32608e984ff2fa4091273cbdd755d54935c51d520887f4a1dbd5b0" -dependencies = [ - "prost 0.12.6", -] - [[package]] name = "prost-types" version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60caa6738c7369b940c3d49246a8d1749323674c65cb13010134f5c9bad5b519" dependencies = [ - "prost 0.13.2", + "prost", ] [[package]] @@ -3058,9 +3036,9 @@ dependencies = [ "pbjson-build", "pbjson-types", "prettyplease", - "prost 0.13.2", + "prost", "prost-build", - "prost-types 0.13.2", + "prost-types", "protobuf-src", "schemars", "semver", diff --git a/Cargo.toml b/Cargo.toml index 126aeb7c..a7fae57f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,8 +40,8 @@ pyo3 = { version = "0.22", features = ["extension-module", "abi3", "abi3-py38"] arrow = { version = "53", feature = ["pyarrow"] } datafusion = { version = "41.0.0", features = ["pyarrow", "avro", "unicode_expressions"] } datafusion-substrait = { version = "41.0.0", optional = true } -prost = "0.12" # keep in line with `datafusion-substrait` -prost-types = "0.12" # keep in line with `datafusion-substrait` +prost = "0.13" # keep in line with `datafusion-substrait` +prost-types = "0.13" # keep in line with `datafusion-substrait` uuid = { version = "1.9", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] } async-trait = "0.1" @@ -64,5 +64,5 @@ lto = true codegen-units = 1 [patch.crates-io] -datafusion = { path = "../../arrow-datafusion-main/datafusion/core" } -datafusion-substrait = { path = "../../arrow-datafusion-main/datafusion/substrait" } +datafusion = { git = "https://github.com/apache/datafusion.git", rev = "c71a9d7508e37e5d082e22d2953a12b61d290df5" } +datafusion-substrait = { git = "https://github.com/apache/datafusion.git", rev = "c71a9d7508e37e5d082e22d2953a12b61d290df5" } diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 903d4a10..35a40ccd 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -28,7 +28,7 @@ from datafusion._internal import AggregateUDF from datafusion.catalog import Catalog, Table from datafusion.dataframe import DataFrame -from datafusion.expr import Expr +from datafusion.expr import Expr, SortExpr, sort_list_to_raw_sort_list from datafusion.record_batch import RecordBatchStream from datafusion.udf import ScalarUDF @@ -466,7 +466,7 @@ def register_listing_table( table_partition_cols: list[tuple[str, str]] | None = None, file_extension: str = ".parquet", schema: pyarrow.Schema | None = None, - file_sort_order: list[list[Expr]] | None = None, + file_sort_order: list[list[Expr | SortExpr]] | None = None, ) -> None: """Register multiple files as a single table. @@ -484,15 +484,18 @@ def register_listing_table( """ if table_partition_cols is None: table_partition_cols = [] - if file_sort_order is not None: - file_sort_order = [[x.expr for x in xs] for xs in file_sort_order] + file_sort_order_raw = ( + [sort_list_to_raw_sort_list(f) for f in file_sort_order] + if file_sort_order is not None + else None + ) self.ctx.register_listing_table( name, str(path), table_partition_cols, file_extension, schema, - file_sort_order, + file_sort_order_raw, ) def sql(self, query: str, options: SQLOptions | None = None) -> DataFrame: diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index 56dff22a..2328ef8f 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -33,7 +33,7 @@ from typing import Callable from datafusion._internal import DataFrame as DataFrameInternal -from datafusion.expr import Expr +from datafusion.expr import Expr, SortExpr, sort_or_default from datafusion._internal import ( LogicalPlan, ExecutionPlan, @@ -199,7 +199,7 @@ def aggregate( aggs = [e.expr for e in aggs] return DataFrame(self.df.aggregate(group_by, aggs)) - def sort(self, *exprs: Expr) -> DataFrame: + def sort(self, *exprs: Expr | SortExpr) -> DataFrame: """Sort the DataFrame by the specified sorting expressions. Note that any expression can be turned into a sort expression by @@ -211,8 +211,8 @@ def sort(self, *exprs: Expr) -> DataFrame: Returns: DataFrame after sorting. """ - exprs = [expr.expr for expr in exprs] - return DataFrame(self.df.sort(*exprs)) + exprs_raw = [sort_or_default(expr) for expr in exprs] + return DataFrame(self.df.sort(*exprs_raw)) def limit(self, count: int, offset: int = 0) -> DataFrame: """Return a new :py:class:`DataFrame` with a limited number of rows. diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index bd6a86fb..60f87cf0 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -159,6 +159,27 @@ ] +def expr_list_to_raw_expr_list( + expr_list: Optional[list[Expr]], +) -> Optional[list[expr_internal.Expr]]: + """Helper function to convert an optional list to raw expressions.""" + return [e.expr for e in expr_list] if expr_list is not None else None + + +def sort_or_default(e: Expr | SortExpr) -> expr_internal.SortExpr: + """Helper function to return a default Sort if an Expr is provided.""" + if isinstance(e, SortExpr): + return e.raw_sort + return SortExpr(e.expr, True, True).raw_sort + + +def sort_list_to_raw_sort_list( + sort_list: Optional[list[Expr | SortExpr]], +) -> Optional[list[expr_internal.SortExpr]]: + """Helper function to return an optional sort list to raw variant.""" + return [sort_or_default(e) for e in sort_list] if sort_list is not None else None + + class Expr: """Expression object. @@ -355,14 +376,14 @@ def alias(self, name: str) -> Expr: """Assign a name to the expression.""" return Expr(self.expr.alias(name)) - def sort(self, ascending: bool = True, nulls_first: bool = True) -> Expr: + def sort(self, ascending: bool = True, nulls_first: bool = True) -> SortExpr: """Creates a sort :py:class:`Expr` from an existing :py:class:`Expr`. Args: ascending: If true, sort in ascending order. nulls_first: Return null values first. """ - return Expr(self.expr.sort(ascending=ascending, nulls_first=nulls_first)) + return SortExpr(self.expr, ascending=ascending, nulls_first=nulls_first) def is_null(self) -> Expr: """Returns ``True`` if this expression is null.""" @@ -439,14 +460,14 @@ def column_name(self, plan: LogicalPlan) -> str: """Compute the output column name based on the provided logical plan.""" return self.expr.column_name(plan) - def order_by(self, *exprs: Expr) -> ExprFuncBuilder: + def order_by(self, *exprs: Expr | SortExpr) -> ExprFuncBuilder: """Set the ordering for a window or aggregate function. This function will create an :py:class:`ExprFuncBuilder` that can be used to set parameters for either window or aggregate functions. If used on any other type of expression, an error will be generated when ``build()`` is called. """ - return ExprFuncBuilder(self.expr.order_by(list(e.expr for e in exprs))) + return ExprFuncBuilder(self.expr.order_by([sort_or_default(e) for e in exprs])) def filter(self, filter: Expr) -> ExprFuncBuilder: """Filter an aggregate function. @@ -506,7 +527,9 @@ def order_by(self, *exprs: Expr) -> ExprFuncBuilder: Values given in ``exprs`` must be sort expressions. You can convert any other expression to a sort expression using `.sort()`. """ - return ExprFuncBuilder(self.builder.order_by(list(e.expr for e in exprs))) + return ExprFuncBuilder( + self.builder.order_by([sort_or_default(e) for e in exprs]) + ) def filter(self, filter: Expr) -> ExprFuncBuilder: """Filter values during aggregation.""" @@ -643,3 +666,22 @@ def end(self) -> Expr: Any non-matching cases will end in a `null` value. """ return Expr(self.case_builder.end()) + + +class SortExpr: + """Used to specify sorting on either a DataFrame or function""" + + def __init__(self, expr: Expr, ascending: bool, nulls_first: bool) -> None: + self.raw_sort = expr_internal.SortExpr(expr, ascending, nulls_first) + + def expr(self) -> Expr: + return Expr(self.raw_sort.expr()) + + def ascending(self) -> bool: + return self.raw_sort.ascending() + + def nulls_first(self) -> bool: + return self.raw_sort.nulls_first() + + def __repr__(self) -> str: + return self.raw_sort.__repr__() diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 163ff04e..e17449ae 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -19,7 +19,14 @@ from __future__ import annotations from datafusion._internal import functions as f, expr as expr_internal -from datafusion.expr import CaseBuilder, Expr, WindowFrame +from datafusion.expr import ( + CaseBuilder, + Expr, + WindowFrame, + SortExpr, + sort_list_to_raw_sort_list, + expr_list_to_raw_expr_list, +) from datafusion.context import SessionContext from datafusion.common import NullTreatment @@ -261,12 +268,6 @@ ] -def expr_list_to_raw_expr_list( - expr_list: Optional[list[Expr]], -) -> Optional[list[expr_internal.Expr]]: - return [e.expr for e in expr_list] if expr_list is not None else None - - def isnan(expr: Expr) -> Expr: """Returns true if a given number is +NaN or -NaN otherwise returns false.""" return Expr(f.isnan(expr.expr)) @@ -352,9 +353,9 @@ def concat_ws(separator: str, *args: Expr) -> Expr: return Expr(f.concat_ws(separator, args)) -def order_by(expr: Expr, ascending: bool = True, nulls_first: bool = True) -> Expr: +def order_by(expr: Expr, ascending: bool = True, nulls_first: bool = True) -> SortExpr: """Creates a new sort expression.""" - return Expr(f.order_by(expr.expr, ascending, nulls_first)) + return SortExpr(expr.expr, ascending=ascending, nulls_first=nulls_first) def alias(expr: Expr, name: str) -> Expr: @@ -405,7 +406,7 @@ def window( name: str, args: list[Expr], partition_by: list[Expr] | None = None, - order_by: list[Expr] | None = None, + order_by: list[Expr | SortExpr] | None = None, window_frame: WindowFrame | None = None, ctx: SessionContext | None = None, ) -> Expr: @@ -419,9 +420,9 @@ def window( """ args = [a.expr for a in args] partition_by = expr_list_to_raw_expr_list(partition_by) - order_by = expr_list_to_raw_expr_list(order_by) + order_by_raw = sort_list_to_raw_sort_list(order_by) window_frame = window_frame.window_frame if window_frame is not None else None - return Expr(f.window(name, args, partition_by, order_by, window_frame, ctx)) + return Expr(f.window(name, args, partition_by, order_by_raw, window_frame, ctx)) # scalar functions @@ -1608,7 +1609,7 @@ def array_agg( expression: Expr, distinct: bool = False, filter: Optional[Expr] = None, - order_by: Optional[list[Expr]] = None, + order_by: Optional[list[Expr | SortExpr]] = None, ) -> Expr: """Aggregate values into an array. @@ -1625,7 +1626,7 @@ def array_agg( filter: If provided, only compute against rows for which the filter is True order_by: Order the resultant array values """ - order_by_raw = expr_list_to_raw_expr_list(order_by) + order_by_raw = sort_list_to_raw_sort_list(order_by) filter_raw = filter.expr if filter is not None else None return Expr( @@ -2107,7 +2108,7 @@ def regr_syy( def first_value( expression: Expr, filter: Optional[Expr] = None, - order_by: Optional[list[Expr]] = None, + order_by: Optional[list[Expr | SortExpr]] = None, null_treatment: NullTreatment = NullTreatment.RESPECT_NULLS, ) -> Expr: """Returns the first value in a group of values. @@ -2123,7 +2124,7 @@ def first_value( order_by: Set the ordering of the expression to evaluate null_treatment: Assign whether to respect or ignull null values. """ - order_by_raw = expr_list_to_raw_expr_list(order_by) + order_by_raw = sort_list_to_raw_sort_list(order_by) filter_raw = filter.expr if filter is not None else None return Expr( @@ -2139,7 +2140,7 @@ def first_value( def last_value( expression: Expr, filter: Optional[Expr] = None, - order_by: Optional[list[Expr]] = None, + order_by: Optional[list[Expr | SortExpr]] = None, null_treatment: NullTreatment = NullTreatment.RESPECT_NULLS, ) -> Expr: """Returns the last value in a group of values. @@ -2155,7 +2156,7 @@ def last_value( order_by: Set the ordering of the expression to evaluate null_treatment: Assign whether to respect or ignull null values. """ - order_by_raw = expr_list_to_raw_expr_list(order_by) + order_by_raw = sort_list_to_raw_sort_list(order_by) filter_raw = filter.expr if filter is not None else None return Expr( @@ -2172,7 +2173,7 @@ def nth_value( expression: Expr, n: int, filter: Optional[Expr] = None, - order_by: Optional[list[Expr]] = None, + order_by: Optional[list[Expr | SortExpr]] = None, null_treatment: NullTreatment = NullTreatment.RESPECT_NULLS, ) -> Expr: """Returns the n-th value in a group of values. @@ -2189,7 +2190,7 @@ def nth_value( order_by: Set the ordering of the expression to evaluate null_treatment: Assign whether to respect or ignull null values. """ - order_by_raw = expr_list_to_raw_expr_list(order_by) + order_by_raw = sort_list_to_raw_sort_list(order_by) filter_raw = filter.expr if filter is not None else None return Expr( @@ -2293,7 +2294,7 @@ def lead( shift_offset: int = 1, default_value: Optional[Any] = None, partition_by: Optional[list[Expr]] = None, - order_by: Optional[list[Expr]] = None, + order_by: Optional[list[Expr | SortExpr]] = None, ) -> Expr: """Create a lead window function. @@ -2330,7 +2331,7 @@ def lead( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_cols = [col.expr for col in order_by] if order_by is not None else None + order_by_raw = sort_list_to_raw_sort_list(order_by) return Expr( f.lead( @@ -2338,7 +2339,7 @@ def lead( shift_offset, default_value, partition_by=partition_cols, - order_by=order_cols, + order_by=order_by_raw, ) ) @@ -2348,7 +2349,7 @@ def lag( shift_offset: int = 1, default_value: Optional[Any] = None, partition_by: Optional[list[Expr]] = None, - order_by: Optional[list[Expr]] = None, + order_by: Optional[list[Expr | SortExpr]] = None, ) -> Expr: """Create a lag window function. @@ -2382,7 +2383,7 @@ def lag( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_cols = [col.expr for col in order_by] if order_by is not None else None + order_by_raw = sort_list_to_raw_sort_list(order_by) return Expr( f.lag( @@ -2390,14 +2391,14 @@ def lag( shift_offset, default_value, partition_by=partition_cols, - order_by=order_cols, + order_by=order_by_raw, ) ) def row_number( partition_by: Optional[list[Expr]] = None, - order_by: Optional[list[Expr]] = None, + order_by: Optional[list[Expr | SortExpr]] = None, ) -> Expr: """Create a row number window function. @@ -2421,19 +2422,19 @@ def row_number( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_cols = [col.expr for col in order_by] if order_by is not None else None + order_by_raw = sort_list_to_raw_sort_list(order_by) return Expr( f.row_number( partition_by=partition_cols, - order_by=order_cols, + order_by=order_by_raw, ) ) def rank( partition_by: Optional[list[Expr]] = None, - order_by: Optional[list[Expr]] = None, + order_by: Optional[list[Expr | SortExpr]] = None, ) -> Expr: """Create a rank window function. @@ -2462,19 +2463,19 @@ def rank( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_cols = [col.expr for col in order_by] if order_by is not None else None + order_by_raw = sort_list_to_raw_sort_list(order_by) return Expr( f.rank( partition_by=partition_cols, - order_by=order_cols, + order_by=order_by_raw, ) ) def dense_rank( partition_by: Optional[list[Expr]] = None, - order_by: Optional[list[Expr]] = None, + order_by: Optional[list[Expr | SortExpr]] = None, ) -> Expr: """Create a dense_rank window function. @@ -2498,19 +2499,19 @@ def dense_rank( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_cols = [col.expr for col in order_by] if order_by is not None else None + order_by_raw = sort_list_to_raw_sort_list(order_by) return Expr( f.dense_rank( partition_by=partition_cols, - order_by=order_cols, + order_by=order_by_raw, ) ) def percent_rank( partition_by: Optional[list[Expr]] = None, - order_by: Optional[list[Expr]] = None, + order_by: Optional[list[Expr | SortExpr]] = None, ) -> Expr: """Create a percent_rank window function. @@ -2535,19 +2536,19 @@ def percent_rank( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_cols = [col.expr for col in order_by] if order_by is not None else None + order_by_raw = sort_list_to_raw_sort_list(order_by) return Expr( f.percent_rank( partition_by=partition_cols, - order_by=order_cols, + order_by=order_by_raw, ) ) def cume_dist( partition_by: Optional[list[Expr]] = None, - order_by: Optional[list[Expr]] = None, + order_by: Optional[list[Expr | SortExpr]] = None, ) -> Expr: """Create a cumulative distribution window function. @@ -2572,12 +2573,12 @@ def cume_dist( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_cols = [col.expr for col in order_by] if order_by is not None else None + order_by_raw = sort_list_to_raw_sort_list(order_by) return Expr( f.cume_dist( partition_by=partition_cols, - order_by=order_cols, + order_by=order_by_raw, ) ) @@ -2585,7 +2586,7 @@ def cume_dist( def ntile( groups: int, partition_by: Optional[list[Expr]] = None, - order_by: Optional[list[Expr]] = None, + order_by: Optional[list[Expr | SortExpr]] = None, ) -> Expr: """Create a n-tile window function. @@ -2613,13 +2614,13 @@ def ntile( partition_cols = ( [col.expr for col in partition_by] if partition_by is not None else None ) - order_cols = [col.expr for col in order_by] if order_by is not None else None + order_by_raw = sort_list_to_raw_sort_list(order_by) return Expr( f.ntile( Expr.literal(groups).expr, partition_by=partition_cols, - order_by=order_cols, + order_by=order_by_raw, ) ) @@ -2628,7 +2629,7 @@ def string_agg( expression: Expr, delimiter: str, filter: Optional[Expr] = None, - order_by: Optional[list[Expr]] = None, + order_by: Optional[list[Expr | SortExpr]] = None, ) -> Expr: """Concatenates the input strings. @@ -2645,7 +2646,7 @@ def string_agg( filter: If provided, only compute against rows for which the filter is True order_by: Set the ordering of the expression to evaluate """ - order_by_raw = expr_list_to_raw_expr_list(order_by) + order_by_raw = sort_list_to_raw_sort_list(order_by) filter_raw = filter.expr if filter is not None else None return Expr( diff --git a/python/datafusion/tests/test_sql.py b/python/datafusion/tests/test_sql.py index e41d0100..cbb2e9f5 100644 --- a/python/datafusion/tests/test_sql.py +++ b/python/datafusion/tests/test_sql.py @@ -264,14 +264,17 @@ def test_execute(ctx, tmp_path): # count result = ctx.sql("SELECT COUNT(a) AS cnt FROM t WHERE a IS NOT NULL").collect() + ctx.sql("SELECT COUNT(a) AS cnt FROM t WHERE a IS NOT NULL").show() + + expected_schema = pa.schema([("cnt", pa.int64(), False)]) + expected_values = pa.array([7], type=pa.int64()) + expected = [pa.RecordBatch.from_arrays([expected_values], schema=expected_schema)] - expected = pa.array([7], pa.int64()) - expected = [pa.RecordBatch.from_arrays([expected], ["cnt"])] assert result == expected # where - expected = pa.array([2], pa.int64()) - expected = [pa.RecordBatch.from_arrays([expected], ["cnt"])] + expected_values = pa.array([2], type=pa.int64()) + expected = [pa.RecordBatch.from_arrays([expected_values], schema=expected_schema)] result = ctx.sql("SELECT COUNT(a) AS cnt FROM t WHERE a > 10").collect() assert result == expected diff --git a/python/datafusion/tests/test_wrapper_coverage.py b/python/datafusion/tests/test_wrapper_coverage.py index 4a47de2e..c53a89c5 100644 --- a/python/datafusion/tests/test_wrapper_coverage.py +++ b/python/datafusion/tests/test_wrapper_coverage.py @@ -39,7 +39,10 @@ def missing_exports(internal_obj, wrapped_obj) -> None: internal_attr = getattr(internal_obj, attr) wrapped_attr = getattr(wrapped_obj, attr) - assert wrapped_attr is not None if internal_attr is not None else True + if internal_attr is not None: + if wrapped_attr is None: + print("Missing attribute: ", attr) + assert False if attr in ["__self__", "__class__"]: continue diff --git a/src/dataframe.rs b/src/dataframe.rs index 07c5f7f3..69c02782 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -45,7 +45,10 @@ use crate::physical_plan::PyExecutionPlan; use crate::record_batch::PyRecordBatchStream; use crate::sql::logical::PyLogicalPlan; use crate::utils::{get_tokio_runtime, wait_for_future}; -use crate::{errors::DataFusionError, expr::{PyExpr, sort_expr::PySortExpr}}; +use crate::{ + errors::DataFusionError, + expr::{sort_expr::PySortExpr, PyExpr}, +}; /// A PyDataFrame is a representation of a logical plan and an API to compose statements. /// Use it to build a plan and `.collect()` to execute the plan and collect the result. diff --git a/src/expr.rs b/src/expr.rs index 823b1143..c4ebedc6 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -94,7 +94,7 @@ pub mod unnest; pub mod unnest_expr; pub mod window; -use sort_expr::{PySortExpr, to_sort_expressions}; +use sort_expr::{to_sort_expressions, PySortExpr}; /// A PyExpr that can be used on a DataFrame #[pyclass(name = "Expr", module = "datafusion.expr", subclass)] @@ -152,7 +152,6 @@ impl PyExpr { Expr::Case(value) => Ok(case::PyCase::from(value.clone()).into_py(py)), Expr::Cast(value) => Ok(cast::PyCast::from(value.clone()).into_py(py)), Expr::TryCast(value) => Ok(cast::PyTryCast::from(value.clone()).into_py(py)), - Expr::Sort(value) => Ok(sort_expr::PySortExpr::from(value.clone()).into_py(py)), Expr::ScalarFunction(value) => Err(py_unsupported_variant_err(format!( "Converting Expr::ScalarFunction to a Python object is not implemented: {:?}", value @@ -169,9 +168,9 @@ impl PyExpr { Expr::ScalarSubquery(value) => { Ok(scalar_subquery::PyScalarSubquery::from(value.clone()).into_py(py)) } - Expr::Wildcard { qualifier } => Err(py_unsupported_variant_err(format!( - "Converting Expr::Wildcard to a Python object is not implemented : {:?}", - qualifier + Expr::Wildcard { qualifier, options } => Err(py_unsupported_variant_err(format!( + "Converting Expr::Wildcard to a Python object is not implemented : {:?} {:?}", + qualifier, options ))), Expr::GroupingSet(value) => { Ok(grouping_set::PyGroupingSet::from(value.clone()).into_py(py)) @@ -276,7 +275,7 @@ impl PyExpr { /// Create a sort PyExpr from an existing PyExpr. #[pyo3(signature = (ascending=true, nulls_first=true))] - pub fn sort(&self, ascending: bool, nulls_first: bool) -> PyExpr { + pub fn sort(&self, ascending: bool, nulls_first: bool) -> PySortExpr { self.expr.clone().sort(ascending, nulls_first).into() } @@ -314,7 +313,6 @@ impl PyExpr { | Expr::Case { .. } | Expr::Cast { .. } | Expr::TryCast { .. } - | Expr::Sort { .. } | Expr::ScalarFunction { .. } | Expr::AggregateFunction { .. } | Expr::WindowFunction { .. } @@ -378,7 +376,6 @@ impl PyExpr { | Expr::Negative(expr) | Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) - | Expr::Sort(Sort { expr, .. }) | Expr::InSubquery(InSubquery { expr, .. }) => Ok(vec![PyExpr::from(*expr.clone())]), // Expr variants containing a collection of Expr(s) for operands @@ -621,11 +618,6 @@ impl PyExpr { input_plan: &LogicalPlan, ) -> Result, DataFusionError> { match expr { - Expr::Sort(Sort { expr, .. }) => { - // DataFusion does not support create_name for sort expressions (since they never - // appear in projections) so we just delegate to the contained expression instead - Self::expr_to_field(expr, input_plan) - } Expr::Wildcard { .. } => { // Since * could be any of the valid column names just return the first one Ok(Arc::new(input_plan.schema().field(0).clone())) diff --git a/src/expr/sort_expr.rs b/src/expr/sort_expr.rs index 34ed91f6..12f74e4d 100644 --- a/src/expr/sort_expr.rs +++ b/src/expr/sort_expr.rs @@ -52,10 +52,7 @@ impl Display for PySortExpr { } pub fn to_sort_expressions(order_by: Vec) -> Vec { - order_by - .iter() - .map(|e| e.sort.clone()) - .collect() + order_by.iter().map(|e| e.sort.clone()).collect() } pub fn py_sort_expr_list(expr: &[SortExpr]) -> PyResult> { @@ -64,6 +61,17 @@ pub fn py_sort_expr_list(expr: &[SortExpr]) -> PyResult> { #[pymethods] impl PySortExpr { + #[new] + fn new(expr: PyExpr, asc: bool, nulls_first: bool) -> Self { + Self { + sort: SortExpr { + expr: expr.into(), + asc, + nulls_first, + }, + } + } + fn expr(&self) -> PyResult { Ok(self.sort.expr.clone().into()) } diff --git a/src/functions.rs b/src/functions.rs index 7765f522..d4ba67de 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -176,15 +176,11 @@ fn regexp_replace( /// Creates a new Sort Expr #[pyfunction] fn order_by(expr: PyExpr, asc: bool, nulls_first: bool) -> PyResult { - Ok( - PySortExpr::from( - datafusion::logical_expr::expr::Sort { - expr: expr.expr, - asc, - nulls_first, - } - ) - ) + Ok(PySortExpr::from(datafusion::logical_expr::expr::Sort { + expr: expr.expr, + asc, + nulls_first, + })) } /// Creates a new Alias Expr @@ -296,7 +292,7 @@ fn window( name: &str, args: Vec, partition_by: Option>, - order_by: Option>, + order_by: Option>, window_frame: Option, ctx: Option, ) -> PyResult { @@ -318,11 +314,7 @@ fn window( order_by: order_by .unwrap_or_default() .into_iter() - .map(|x| x.expr) - .map(|e| match e { - Expr::Sort(_) => e, - _ => e.sort(true, true), - }) + .map(|x| x.into()) .collect::>(), window_frame, null_treatment: None, @@ -690,20 +682,20 @@ pub fn first_value( } // nth_value requires a non-expr argument -// #[pyfunction] -// #[pyo3(signature = (expr, n, distinct=None, filter=None, order_by=None, null_treatment=None))] -// pub fn nth_value( -// expr: PyExpr, -// n: i64, -// distinct: Option, -// filter: Option, -// order_by: Option>, -// null_treatment: Option, -// ) -> PyResult { -// // @todo: Commenting this function out for now as it requires some reworking -// let agg_fn = datafusion::functions_aggregate::nth_value::nth_value(vec![expr.expr, lit(n)]); -// add_builder_fns_to_aggregate(agg_fn, distinct, filter, order_by, null_treatment) -// } +#[pyfunction] +#[pyo3(signature = (expr, n, distinct=None, filter=None, order_by=None, null_treatment=None))] +pub fn nth_value( + expr: PyExpr, + n: i64, + distinct: Option, + filter: Option, + order_by: Option>, + null_treatment: Option, +) -> PyResult { + // @todo: Commenting this function out for now as it requires some reworking + let agg_fn = datafusion::functions_aggregate::nth_value::nth_value(expr.expr, n, vec![]); + add_builder_fns_to_aggregate(agg_fn, distinct, filter, order_by, null_treatment) +} // string_agg requires a non-expr argument #[pyfunction] @@ -776,7 +768,21 @@ pub fn lag( #[pyfunction] #[pyo3(signature = (partition_by=None, order_by=None))] -pub fn rank(partition_by: Option>, order_by: Option>) -> PyResult { +pub fn row_number( + partition_by: Option>, + order_by: Option>, +) -> PyResult { + let window_fn = datafusion::functions_window::expr_fn::row_number(); + + add_builder_fns_to_window(window_fn, partition_by, order_by) +} + +#[pyfunction] +#[pyo3(signature = (partition_by=None, order_by=None))] +pub fn rank( + partition_by: Option>, + order_by: Option>, +) -> PyResult { let window_fn = window_function::rank(); add_builder_fns_to_window(window_fn, partition_by, order_by) @@ -969,7 +975,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(regr_syy))?; m.add_wrapped(wrap_pyfunction!(first_value))?; m.add_wrapped(wrap_pyfunction!(last_value))?; - // m.add_wrapped(wrap_pyfunction!(nth_value))?; + m.add_wrapped(wrap_pyfunction!(nth_value))?; m.add_wrapped(wrap_pyfunction!(bit_and))?; m.add_wrapped(wrap_pyfunction!(bit_or))?; m.add_wrapped(wrap_pyfunction!(bit_xor))?; @@ -1017,6 +1023,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(lead))?; m.add_wrapped(wrap_pyfunction!(lag))?; m.add_wrapped(wrap_pyfunction!(rank))?; + m.add_wrapped(wrap_pyfunction!(row_number))?; m.add_wrapped(wrap_pyfunction!(dense_rank))?; m.add_wrapped(wrap_pyfunction!(percent_rank))?; m.add_wrapped(wrap_pyfunction!(cume_dist))?; diff --git a/src/pyarrow_filter_expression.rs b/src/pyarrow_filter_expression.rs index 6e2a45e1..0f97ea44 100644 --- a/src/pyarrow_filter_expression.rs +++ b/src/pyarrow_filter_expression.rs @@ -27,7 +27,7 @@ use datafusion::logical_expr::{expr::InList, Between, BinaryExpr, Expr, Operator use crate::errors::DataFusionError; -#[derive(Debug, Clone)] +#[derive(Debug)] #[repr(transparent)] pub(crate) struct PyArrowFilterExpression(PyObject); diff --git a/src/udf.rs b/src/udf.rs index 4d57f87b..7d5db2f9 100644 --- a/src/udf.rs +++ b/src/udf.rs @@ -20,8 +20,8 @@ use std::sync::Arc; use pyo3::{prelude::*, types::PyTuple}; use datafusion::arrow::array::{make_array, Array, ArrayData, ArrayRef}; -use datafusion::arrow::pyarrow::FromPyArrow; use datafusion::arrow::datatypes::DataType; +use datafusion::arrow::pyarrow::FromPyArrow; use datafusion::arrow::pyarrow::{PyArrowType, ToPyArrow}; use datafusion::error::DataFusionError; use datafusion::logical_expr::create_udf; From c89357ea7f702dca7986e908be32783be3cd8d3b Mon Sep 17 00:00:00 2001 From: Matt Green Date: Tue, 10 Sep 2024 12:01:21 -0700 Subject: [PATCH 13/26] remove unused import --- src/expr.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/expr.rs b/src/expr.rs index c4ebedc6..40c4266b 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -28,7 +28,7 @@ use datafusion::arrow::pyarrow::PyArrowType; use datafusion::functions::core::expr_ext::FieldAccessor; use datafusion::logical_expr::{ col, - expr::{AggregateFunction, InList, InSubquery, ScalarFunction, Sort, WindowFunction}, + expr::{AggregateFunction, InList, InSubquery, ScalarFunction, WindowFunction}, lit, Between, BinaryExpr, Case, Cast, Expr, Like, Operator, TryCast, }; use datafusion::scalar::ScalarValue; From 8255f09eaeb59d0380644b8348d78dca3b6b701b Mon Sep 17 00:00:00 2001 From: Matt Green Date: Tue, 10 Sep 2024 12:02:55 -0700 Subject: [PATCH 14/26] expr.display_name is deprecated, used format!() + schema_name() instead --- src/expr.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/expr.rs b/src/expr.rs index 40c4266b..32302e74 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -190,7 +190,7 @@ impl PyExpr { /// Returns the name of this expression as it should appear in a schema. This name /// will not include any CAST expressions. fn display_name(&self) -> PyResult { - Ok(self.expr.display_name()?) + Ok(format!("{}", self.expr.schema_name())) } /// Returns a full and complete string representation of this expression. From df4605400f6efb488cd3b8a22c2720877b10ff2b Mon Sep 17 00:00:00 2001 From: Matt Green Date: Tue, 10 Sep 2024 12:03:50 -0700 Subject: [PATCH 15/26] expr.canonical_name() is deprecated, use format!() expr instead --- src/expr.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/expr.rs b/src/expr.rs index 32302e74..4841b26b 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -195,7 +195,7 @@ impl PyExpr { /// Returns a full and complete string representation of this expression. fn canonical_name(&self) -> PyResult { - Ok(self.expr.canonical_name()) + Ok(format!("{}", self.expr)) } /// Returns the name of the Expr variant. From 6c276149237d65801d627de4cb3e5ecd4687d53c Mon Sep 17 00:00:00 2001 From: Matt Green Date: Tue, 10 Sep 2024 12:06:39 -0700 Subject: [PATCH 16/26] remove comment --- src/functions.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/functions.rs b/src/functions.rs index d4ba67de..32f6519f 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -692,7 +692,6 @@ pub fn nth_value( order_by: Option>, null_treatment: Option, ) -> PyResult { - // @todo: Commenting this function out for now as it requires some reworking let agg_fn = datafusion::functions_aggregate::nth_value::nth_value(expr.expr, n, vec![]); add_builder_fns_to_aggregate(agg_fn, distinct, filter, order_by, null_treatment) } From 70546e2ba1952b503f4e16a3fc18828c82d175e6 Mon Sep 17 00:00:00 2001 From: Matt Green Date: Tue, 10 Sep 2024 12:50:22 -0700 Subject: [PATCH 17/26] fix tuple extraction in dataframe.__getitem__() --- src/dataframe.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/dataframe.rs b/src/dataframe.rs index 69c02782..6100ebd0 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -36,7 +36,7 @@ use datafusion::prelude::*; use pyo3::exceptions::{PyTypeError, PyValueError}; use pyo3::prelude::*; use pyo3::pybacked::PyBackedStr; -use pyo3::types::{PyCapsule, PyTuple}; +use pyo3::types::{PyCapsule, PyTuple, PyTupleMethods}; use tokio::task::JoinHandle; use crate::errors::py_datafusion_err; @@ -73,14 +73,14 @@ impl PyDataFrame { if let Ok(key) = key.extract::() { // df[col] self.select_columns(vec![key]) - // } else if let Ok(tuple) = key.extract::<&PyTuple>() { - // @todo: make this branch work - // // df[col1, col2, col3] - // let keys = tuple - // .iter() - // .map(|item| item.extract::()) - // .collect::>>()?; - // self.select_columns(keys) + } else if let Ok(tuple) = key.extract::>() { + // df[col1, col2, col3] + let tuple = tuple.bind(key.py()); + let keys = tuple + .iter() + .map(|item| item.extract::()) + .collect::>>()?; + self.select_columns(keys) } else if let Ok(keys) = key.extract::>() { // df[[col1, col2, col3]] self.select_columns(keys) From 836061fc744746070feded92af901ceb29b3fb7b Mon Sep 17 00:00:00 2001 From: Matt Green Date: Tue, 10 Sep 2024 12:56:20 -0700 Subject: [PATCH 18/26] remove unneeded import --- python/datafusion/functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index e17449ae..0401afbc 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -18,7 +18,7 @@ from __future__ import annotations -from datafusion._internal import functions as f, expr as expr_internal +from datafusion._internal import functions as f from datafusion.expr import ( CaseBuilder, Expr, From 4945661c0ea2dbe6965ba90d6c1a43f26c109f28 Mon Sep 17 00:00:00 2001 From: Matt Green Date: Tue, 10 Sep 2024 13:04:23 -0700 Subject: [PATCH 19/26] Add docstring comments to SortExpr python class --- python/datafusion/expr.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 60f87cf0..78acee31 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -84,7 +84,7 @@ ScalarVariable = expr_internal.ScalarVariable SimilarTo = expr_internal.SimilarTo Sort = expr_internal.Sort -SortExpr = expr_internal.SortExpr +# SortExpr = expr_internal.SortExpr Subquery = expr_internal.Subquery SubqueryAlias = expr_internal.SubqueryAlias TableScan = expr_internal.TableScan @@ -669,19 +669,24 @@ def end(self) -> Expr: class SortExpr: - """Used to specify sorting on either a DataFrame or function""" + """Used to specify sorting on either a DataFrame or function.""" def __init__(self, expr: Expr, ascending: bool, nulls_first: bool) -> None: + """This constructor should not be called by the end user.""" self.raw_sort = expr_internal.SortExpr(expr, ascending, nulls_first) def expr(self) -> Expr: + """Return the raw expr backing teh SortExpr.""" return Expr(self.raw_sort.expr()) def ascending(self) -> bool: + """Return ascending property.""" return self.raw_sort.ascending() def nulls_first(self) -> bool: + """Return nulls_first property.""" return self.raw_sort.nulls_first() def __repr__(self) -> str: + """Generate a string representation of this expression.""" return self.raw_sort.__repr__() From cd04c44dd33acbf4bf5edf1da528fbf213a5202e Mon Sep 17 00:00:00 2001 From: Matt Green Date: Tue, 10 Sep 2024 13:27:16 -0700 Subject: [PATCH 20/26] change extract() to downcast() Co-authored-by: Michael J Ward --- src/dataframe.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/dataframe.rs b/src/dataframe.rs index 6100ebd0..1f7f2e64 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -73,9 +73,8 @@ impl PyDataFrame { if let Ok(key) = key.extract::() { // df[col] self.select_columns(vec![key]) - } else if let Ok(tuple) = key.extract::>() { + } else if let Ok(tuple) = key.downcast::() { // df[col1, col2, col3] - let tuple = tuple.bind(key.py()); let keys = tuple .iter() .map(|item| item.extract::()) From afcc9f1e099a80fb00886e95fa212414b0071517 Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Sat, 10 Aug 2024 15:33:41 -0500 Subject: [PATCH 21/26] deprecate Expr::display_name Ref: https://github.com/apache/datafusion/pull/11797 --- python/datafusion/expr.py | 11 ++++++++++- python/datafusion/tests/test_expr.py | 20 ++++++++++++++++++++ src/expr.rs | 2 +- 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 78acee31..7d8a7221 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -29,6 +29,7 @@ ) from datafusion.common import NullTreatment, RexType, DataTypeMap from typing import Any, Optional, Type +from typing_extensions import deprecated import pyarrow as pa # The following are imported from the internal representation. We may choose to @@ -195,12 +196,20 @@ def to_variant(self) -> Any: """Convert this expression into a python object if possible.""" return self.expr.to_variant() + @deprecated("display_name() is deprecated. Use :py:meth:`~Expr.schema_name` instead") def display_name(self) -> str: """Returns the name of this expression as it should appear in a schema. This name will not include any CAST expressions. """ - return self.expr.display_name() + return self.schema_name() + + def schema_name(self) -> str: + """Returns the name of this expression as it should appear in a schema. + + This name will not include any CAST expressions. + """ + return self.expr.schema_name() def canonical_name(self) -> str: """Returns a complete string representation of this expression.""" diff --git a/python/datafusion/tests/test_expr.py b/python/datafusion/tests/test_expr.py index 056d2ea0..1aa93110 100644 --- a/python/datafusion/tests/test_expr.py +++ b/python/datafusion/tests/test_expr.py @@ -192,3 +192,23 @@ def test_expr_getitem() -> None: assert names == ["Alice", "Bob", "Charlie", None] assert array_values == [2, 5, None, None] + + +def test_display_name_deprecation(): + import warnings + expr = col("foo") + with warnings.catch_warnings(record=True) as w: + # Cause all warnings to always be triggered + warnings.simplefilter("always") + + # should trigger warning + name = expr.display_name() + + # Verify some things + assert len(w) == 1 + assert issubclass(w[-1].category, DeprecationWarning) + assert "deprecated" in str(w[-1].message) + + # returns appropriate result + assert name == expr.schema_name() + assert name == "foo" diff --git a/src/expr.rs b/src/expr.rs index 4841b26b..e1edbe86 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -189,7 +189,7 @@ impl PyExpr { /// Returns the name of this expression as it should appear in a schema. This name /// will not include any CAST expressions. - fn display_name(&self) -> PyResult { + fn schema_name(&self) -> PyResult { Ok(format!("{}", self.expr.schema_name())) } From 7f6187ac2641ffdef31acb7a1cc625327fa4d1a4 Mon Sep 17 00:00:00 2001 From: Matt Green Date: Tue, 10 Sep 2024 14:11:43 -0700 Subject: [PATCH 22/26] fix lint errors --- python/datafusion/expr.py | 18 ++++++++++-------- python/datafusion/tests/test_expr.py | 14 +++++++++----- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 7d8a7221..6edf1b1d 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -22,15 +22,15 @@ from __future__ import annotations -from ._internal import ( - expr as expr_internal, - LogicalPlan, - functions as functions_internal, -) -from datafusion.common import NullTreatment, RexType, DataTypeMap from typing import Any, Optional, Type -from typing_extensions import deprecated + import pyarrow as pa +from datafusion.common import DataTypeMap, NullTreatment, RexType +from typing_extensions import deprecated + +from ._internal import LogicalPlan +from ._internal import expr as expr_internal +from ._internal import functions as functions_internal # The following are imported from the internal representation. We may choose to # give these all proper wrappers, or to simply leave as is. These were added @@ -196,7 +196,9 @@ def to_variant(self) -> Any: """Convert this expression into a python object if possible.""" return self.expr.to_variant() - @deprecated("display_name() is deprecated. Use :py:meth:`~Expr.schema_name` instead") + @deprecated( + "display_name() is deprecated. Use :py:meth:`~Expr.schema_name` instead" + ) def display_name(self) -> str: """Returns the name of this expression as it should appear in a schema. diff --git a/python/datafusion/tests/test_expr.py b/python/datafusion/tests/test_expr.py index 1aa93110..b58177f1 100644 --- a/python/datafusion/tests/test_expr.py +++ b/python/datafusion/tests/test_expr.py @@ -15,18 +15,21 @@ # specific language governing permissions and limitations # under the License. +import pyarrow +import pytest from datafusion import SessionContext, col -from datafusion.expr import Column, Literal, BinaryExpr, AggregateFunction from datafusion.expr import ( - Projection, - Filter, Aggregate, + AggregateFunction, + BinaryExpr, + Column, + Filter, Limit, + Literal, + Projection, Sort, TableScan, ) -import pyarrow -import pytest @pytest.fixture @@ -196,6 +199,7 @@ def test_expr_getitem() -> None: def test_display_name_deprecation(): import warnings + expr = col("foo") with warnings.catch_warnings(record=True) as w: # Cause all warnings to always be triggered From 8aebaea372f635d2d15788c4ee857543b3bb9e93 Mon Sep 17 00:00:00 2001 From: Matt Green Date: Wed, 11 Sep 2024 11:57:03 -0700 Subject: [PATCH 23/26] update datafusion commit hash --- Cargo.lock | 54 +++++++++++++++++++++++++++--------------------------- Cargo.toml | 4 ++-- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bb1d800b..ff53261e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -84,9 +84,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.87" +version = "1.0.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10f00e1f6e58a40e807377c75c6a7f97bf9044fab57816f2414e6f5f4499d7b8" +checksum = "4e1496f8fb1fbf272686b8d37f523dab3e4a7443300055e74cdaa449f3114356" [[package]] name = "apache-avro" @@ -747,7 +747,7 @@ dependencies = [ [[package]] name = "datafusion" version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" +source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" dependencies = [ "ahash", "apache-avro", @@ -805,7 +805,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" +source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" dependencies = [ "arrow-schema", "async-trait", @@ -819,7 +819,7 @@ dependencies = [ [[package]] name = "datafusion-common" version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" +source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" dependencies = [ "ahash", "apache-avro", @@ -844,7 +844,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" +source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" dependencies = [ "log", "tokio", @@ -853,7 +853,7 @@ dependencies = [ [[package]] name = "datafusion-execution" version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" +source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" dependencies = [ "arrow", "chrono", @@ -873,7 +873,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" +source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" dependencies = [ "ahash", "arrow", @@ -894,7 +894,7 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" +source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" dependencies = [ "arrow", "datafusion-common", @@ -904,7 +904,7 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" +source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" dependencies = [ "arrow", "arrow-buffer", @@ -930,7 +930,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" +source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" dependencies = [ "ahash", "arrow", @@ -950,7 +950,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" +source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" dependencies = [ "ahash", "arrow", @@ -963,7 +963,7 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" +source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" dependencies = [ "arrow", "arrow-array", @@ -985,7 +985,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" +source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" dependencies = [ "datafusion-common", "datafusion-expr", @@ -996,7 +996,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" +source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" dependencies = [ "arrow", "async-trait", @@ -1015,7 +1015,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" +source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" dependencies = [ "ahash", "arrow", @@ -1046,7 +1046,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" +source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" dependencies = [ "ahash", "arrow", @@ -1059,7 +1059,7 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" +source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" dependencies = [ "arrow-schema", "datafusion-common", @@ -1072,7 +1072,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" +source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" dependencies = [ "ahash", "arrow", @@ -1130,7 +1130,7 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" +source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" dependencies = [ "arrow", "arrow-array", @@ -1146,7 +1146,7 @@ dependencies = [ [[package]] name = "datafusion-substrait" version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=c71a9d7508e37e5d082e22d2953a12b61d290df5#c71a9d7508e37e5d082e22d2953a12b61d290df5" +source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" dependencies = [ "arrow-buffer", "async-recursion", @@ -2620,9 +2620,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.36" +version = "0.38.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f55e80d50763938498dd5ebb18647174e0c76dc38c5505294bb224624f30f36" +checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" dependencies = [ "bitflags 2.6.0", "errno", @@ -2633,9 +2633,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.12" +version = "0.23.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c58f8c84392efc0a126acce10fa59ff7b3d2ac06ab451a33f2741989b806b044" +checksum = "f2dabaac7466917e566adb06783a81ca48944c6898a1b08b9374106dd671f4c8" dependencies = [ "once_cell", "ring", @@ -3356,9 +3356,9 @@ checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" [[package]] name = "unicode-ident" -version = "1.0.12" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" [[package]] name = "unicode-normalization" diff --git a/Cargo.toml b/Cargo.toml index a7fae57f..4612bb76 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -64,5 +64,5 @@ lto = true codegen-units = 1 [patch.crates-io] -datafusion = { git = "https://github.com/apache/datafusion.git", rev = "c71a9d7508e37e5d082e22d2953a12b61d290df5" } -datafusion-substrait = { git = "https://github.com/apache/datafusion.git", rev = "c71a9d7508e37e5d082e22d2953a12b61d290df5" } +datafusion = { git = "https://github.com/apache/datafusion.git", rev = "13dc8a61ee9f82965cea1ee1e751455e77794f24" } +datafusion-substrait = { git = "https://github.com/apache/datafusion.git", rev = "13dc8a61ee9f82965cea1ee1e751455e77794f24" } From afa303f239023f17d937fa6724410b4c1d6b49c7 Mon Sep 17 00:00:00 2001 From: Matt Green Date: Tue, 17 Sep 2024 10:57:14 -0700 Subject: [PATCH 24/26] fix type in cargo file for arrow features --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 4612bb76..8688dfdc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,7 +37,7 @@ substrait = ["dep:datafusion-substrait"] tokio = { version = "1.39", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" pyo3 = { version = "0.22", features = ["extension-module", "abi3", "abi3-py38"] } -arrow = { version = "53", feature = ["pyarrow"] } +arrow = { version = "53", features = ["pyarrow"] } datafusion = { version = "41.0.0", features = ["pyarrow", "avro", "unicode_expressions"] } datafusion-substrait = { version = "41.0.0", optional = true } prost = "0.13" # keep in line with `datafusion-substrait` From f4574ec2aafd7100df3b1d731fae2137af9ddf69 Mon Sep 17 00:00:00 2001 From: Matt Green Date: Tue, 17 Sep 2024 12:58:01 -0700 Subject: [PATCH 25/26] upgrade to datafusion 42 --- Cargo.lock | 143 ++++++++++++++++++++++++++++++----------------------- Cargo.toml | 8 +-- 2 files changed, 83 insertions(+), 68 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ff53261e..d06073b6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -84,9 +84,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.88" +version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e1496f8fb1fbf272686b8d37f523dab3e4a7443300055e74cdaa449f3114356" +checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6" [[package]] name = "apache-avro" @@ -118,9 +118,9 @@ dependencies = [ [[package]] name = "arrayref" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d151e35f61089500b617991b791fc8bfd237ae50cd5950803758a179b41e67a" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" [[package]] name = "arrayvec" @@ -542,9 +542,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.18" +version = "1.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b62ac837cdb5cb22e10a256099b4fc502b1dfe560cb282963a974d7abd80e476" +checksum = "45bcde016d64c21da4be18b655631e5ab6d3107607e71a73a9f53eb48aae23fb" dependencies = [ "jobserver", "libc", @@ -746,8 +746,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" +version = "42.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee907b081e45e1d14e1f327e89ef134f91fcebad0bfc2dc229fa9f6044379682" dependencies = [ "ahash", "apache-avro", @@ -804,8 +805,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" +version = "42.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c2b914f6e33c429af7d8696c72a47ed9225d7e2b82c747ebdfa2408ed53579f" dependencies = [ "arrow-schema", "async-trait", @@ -818,8 +820,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" +version = "42.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a84f8e76330c582a6b8ada0b2c599ca46cfe46b7585e458fc3f4092bc722a18" dependencies = [ "ahash", "apache-avro", @@ -843,8 +846,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" +version = "42.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf08cc30d92720d557df13bd5a5696213bd5ea0f38a866d8d85055d866fba774" dependencies = [ "log", "tokio", @@ -852,8 +856,9 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" +version = "42.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86bc4183d5c45b9f068a6f351678a0d1eb1225181424542bb75db18ec280b822" dependencies = [ "arrow", "chrono", @@ -872,8 +877,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" +version = "42.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "202119ce58e4d103e37ae64aab40d4e574c97bdd2bea994bf307b175fcbfa74d" dependencies = [ "ahash", "arrow", @@ -893,8 +899,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" +version = "42.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8b181ce8569216abb01ef3294aa16c0a40d7d39350c2ff01ede00f167a535f2" dependencies = [ "arrow", "datafusion-common", @@ -903,8 +910,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" +version = "42.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e4124b8066444e05a24472f852e94cf56546c0f4d92d00f018f207216902712" dependencies = [ "arrow", "arrow-buffer", @@ -929,8 +937,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" +version = "42.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b94acdac235ea21810150a89751617ef2db7e32eba27f54be48a81bde2bfe119" dependencies = [ "ahash", "arrow", @@ -949,8 +958,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" +version = "42.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c9ea085bbf900bf16e2ca0f56fc56236b2e4f2e1a2cccb67bcd83c5ab4ad0ef" dependencies = [ "ahash", "arrow", @@ -962,8 +972,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" +version = "42.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c882e61665ed60c5ce9b061c1e587aeb8ae5ae4bcb5e5f2465139ab25328e0f" dependencies = [ "arrow", "arrow-array", @@ -984,8 +995,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" +version = "42.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98a354ce96df3ca6d025093adac9fd55ca09931c9b6f2630140721a95873fde4" dependencies = [ "datafusion-common", "datafusion-expr", @@ -995,8 +1007,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" +version = "42.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf677c74fb7b5a1899ef52709e4a70fff3ed80bdfb4bbe495909810e83d5f39" dependencies = [ "arrow", "async-trait", @@ -1014,8 +1027,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" +version = "42.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30b077999f6eb6c43d6b25bc66332a3be2f693c382840f008dd763b8540f9530" dependencies = [ "ahash", "arrow", @@ -1045,8 +1059,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" +version = "42.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce847f885c2b13bbe29f5c8b7948797131aa470af6e16d2a94f4428b4f4f1bd" dependencies = [ "ahash", "arrow", @@ -1058,8 +1073,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" +version = "42.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d13238e3b9fdd62a4c18760bfef714bb990d1e1d3430e9f416aae4b3cfaa71af" dependencies = [ "arrow-schema", "datafusion-common", @@ -1071,8 +1087,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" +version = "42.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "faba6f55a7eaf0241d07d12c2640de52742646b10f754485d5192bdfe2c9ceae" dependencies = [ "ahash", "arrow", @@ -1129,8 +1146,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" +version = "42.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dad8d96a9b52e1aa24f9373696a815be828193efce7cb0bbd2140b6bb67d1819" dependencies = [ "arrow", "arrow-array", @@ -1145,8 +1163,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "41.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=13dc8a61ee9f82965cea1ee1e751455e77794f24#13dc8a61ee9f82965cea1ee1e751455e77794f24" +version = "42.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f92b1b80e98bf5a9921bf118816e0e766d18527e343153321fcccfe4d68c5c45" dependencies = [ "arrow-buffer", "async-recursion", @@ -1538,9 +1557,9 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.60" +version = "0.1.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -2313,9 +2332,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.22.2" +version = "0.22.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "831e8e819a138c36e212f3af3fd9eeffed6bf1510a805af35b0edee5ffa59433" +checksum = "15ee168e30649f7f234c3d49ef5a7a6cbf5134289bc46c29ff3155fa3221c225" dependencies = [ "cfg-if", "indoc", @@ -2331,9 +2350,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.22.2" +version = "0.22.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e8730e591b14492a8945cdff32f089250b05f5accecf74aeddf9e8272ce1fa8" +checksum = "e61cef80755fe9e46bb8a0b8f20752ca7676dcc07a5277d8b7768c6172e529b3" dependencies = [ "once_cell", "target-lexicon", @@ -2341,9 +2360,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.22.2" +version = "0.22.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e97e919d2df92eb88ca80a037969f44e5e70356559654962cbb3316d00300c6" +checksum = "67ce096073ec5405f5ee2b8b31f03a68e02aa10d5d4f565eca04acc41931fa1c" dependencies = [ "libc", "pyo3-build-config", @@ -2351,9 +2370,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.22.2" +version = "0.22.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb57983022ad41f9e683a599f2fd13c3664d7063a3ac5714cae4b7bee7d3f206" +checksum = "2440c6d12bc8f3ae39f1e775266fa5122fd0c8891ce7520fa6048e683ad3de28" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -2363,9 +2382,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.22.2" +version = "0.22.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec480c0c51ddec81019531705acac51bcdbeae563557c982aa8263bb96880372" +checksum = "1be962f0e06da8f8465729ea2cb71a416d2257dff56cbe40a70d3e62a93ae5d1" dependencies = [ "heck 0.5.0", "proc-macro2", @@ -2479,9 +2498,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.3" +version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4" +checksum = "0884ad60e090bf1345b93da0a5de8923c93884cd03f40dfcfddd3b4bee661853" dependencies = [ "bitflags 2.6.0", ] @@ -3362,18 +3381,18 @@ checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" [[package]] name = "unicode-normalization" -version = "0.1.23" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" +checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" dependencies = [ "tinyvec", ] [[package]] name = "unicode-segmentation" -version = "1.11.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" [[package]] name = "unicode-width" diff --git a/Cargo.toml b/Cargo.toml index 8688dfdc..11dccc4f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,8 +38,8 @@ tokio = { version = "1.39", features = ["macros", "rt", "rt-multi-thread", "sync rand = "0.8" pyo3 = { version = "0.22", features = ["extension-module", "abi3", "abi3-py38"] } arrow = { version = "53", features = ["pyarrow"] } -datafusion = { version = "41.0.0", features = ["pyarrow", "avro", "unicode_expressions"] } -datafusion-substrait = { version = "41.0.0", optional = true } +datafusion = { version = "42.0.0", features = ["pyarrow", "avro", "unicode_expressions"] } +datafusion-substrait = { version = "42.0.0", optional = true } prost = "0.13" # keep in line with `datafusion-substrait` prost-types = "0.13" # keep in line with `datafusion-substrait` uuid = { version = "1.9", features = ["v4"] } @@ -62,7 +62,3 @@ crate-type = ["cdylib", "rlib"] [profile.release] lto = true codegen-units = 1 - -[patch.crates-io] -datafusion = { git = "https://github.com/apache/datafusion.git", rev = "13dc8a61ee9f82965cea1ee1e751455e77794f24" } -datafusion-substrait = { git = "https://github.com/apache/datafusion.git", rev = "13dc8a61ee9f82965cea1ee1e751455e77794f24" } From 88ccbd85300547cb920e237d2ffb921952519e3e Mon Sep 17 00:00:00 2001 From: Matt Green Date: Tue, 17 Sep 2024 13:26:57 -0700 Subject: [PATCH 26/26] cleanup --- python/datafusion/expr.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 6edf1b1d..ea1cbdb3 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -85,7 +85,6 @@ ScalarVariable = expr_internal.ScalarVariable SimilarTo = expr_internal.SimilarTo Sort = expr_internal.Sort -# SortExpr = expr_internal.SortExpr Subquery = expr_internal.Subquery SubqueryAlias = expr_internal.SubqueryAlias TableScan = expr_internal.TableScan @@ -687,7 +686,7 @@ def __init__(self, expr: Expr, ascending: bool, nulls_first: bool) -> None: self.raw_sort = expr_internal.SortExpr(expr, ascending, nulls_first) def expr(self) -> Expr: - """Return the raw expr backing teh SortExpr.""" + """Return the raw expr backing the SortExpr.""" return Expr(self.raw_sort.expr()) def ascending(self) -> bool: