diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 49e2478d842d..e8f97824b6c7 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -74,16 +74,16 @@ checksum = "990dfa1a9328504aa135820da1c95066537b69ad94c04881b785f64328e0fa6b" dependencies = [ "ahash", "arrow-arith", - "arrow-array", - "arrow-buffer", + "arrow-array 36.0.0", + "arrow-buffer 36.0.0", "arrow-cast", "arrow-csv", - "arrow-data", + "arrow-data 36.0.0", "arrow-ipc", "arrow-json", "arrow-ord", "arrow-row", - "arrow-schema", + "arrow-schema 36.0.0", "arrow-select", "arrow-string", ] @@ -94,15 +94,32 @@ version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2b2e52de0ab54173f9b08232b7184c26af82ee7ab4ac77c83396633c90199fa" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 36.0.0", + "arrow-buffer 36.0.0", + "arrow-data 36.0.0", + "arrow-schema 36.0.0", "chrono", "half", "num", ] +[[package]] +name = "arrow-array" +version = "34.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d35d5475e65c57cffba06d0022e3006b677515f99b54af33a7cd54f6cdd4a5b5" +dependencies = [ + "ahash", + "arrow-buffer 34.0.0", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", + "chrono", + "chrono-tz", + "half", + "hashbrown 0.13.2", + "num", +] + [[package]] name = "arrow-array" version = "36.0.0" @@ -110,9 +127,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e10849b60c17dbabb334be1f4ef7550701aa58082b71335ce1ed586601b2f423" dependencies = [ "ahash", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-buffer 36.0.0", + "arrow-data 36.0.0", + "arrow-schema 36.0.0", "chrono", "chrono-tz", "half", @@ -120,6 +137,16 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-buffer" +version = "34.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68b4ec72eda7c0207727df96cf200f539749d736b21f3e782ece113e18c1a0a7" +dependencies = [ + "half", + "num", +] + [[package]] name = "arrow-buffer" version = "36.0.0" @@ -136,10 +163,10 @@ version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b88897802515d7b193e38b27ddd9d9e43923d410a9e46307582d756959ee9595" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 36.0.0", + "arrow-buffer 36.0.0", + "arrow-data 36.0.0", + "arrow-schema 36.0.0", "arrow-select", "chrono", "comfy-table", @@ -153,11 +180,11 @@ version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c8220d9741fc37961262710ceebd8451a5b393de57c464f0267ffdda1775c0a" dependencies = [ - "arrow-array", - "arrow-buffer", + "arrow-array 36.0.0", + "arrow-buffer 36.0.0", "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-data 36.0.0", + "arrow-schema 36.0.0", "chrono", "csv", "csv-core", @@ -166,14 +193,26 @@ dependencies = [ "regex", ] +[[package]] +name = "arrow-data" +version = "34.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27cc673ee6989ea6e4b4e8c7d461f7e06026a096c8f0b1a7288885ff71ae1e56" +dependencies = [ + "arrow-buffer 34.0.0", + "arrow-schema 34.0.0", + "half", + "num", +] + [[package]] name = "arrow-data" version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "533f937efa1aaad9dc86f6a0e382c2fa736a4943e2090c946138079bdf060cef" dependencies = [ - "arrow-buffer", - "arrow-schema", + "arrow-buffer 36.0.0", + "arrow-schema 36.0.0", "half", "num", ] @@ -184,11 +223,11 @@ version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18b75296ff01833f602552dff26a423fc213db8e5049b540ca4a00b1c957e41c" dependencies = [ - "arrow-array", - "arrow-buffer", + "arrow-array 36.0.0", + "arrow-buffer 36.0.0", "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-data 36.0.0", + "arrow-schema 36.0.0", "flatbuffers", ] @@ -198,11 +237,11 @@ version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e501d3de4d612c90677594896ca6c0fa075665a7ff980dc4189bb531c17e19f6" dependencies = [ - "arrow-array", - "arrow-buffer", + "arrow-array 36.0.0", + "arrow-buffer 36.0.0", "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-data 36.0.0", + "arrow-schema 36.0.0", "chrono", "half", "indexmap", @@ -217,10 +256,10 @@ version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33d2671eb3793f9410230ac3efb0e6d36307be8a2dac5fad58ac9abde8e9f01e" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 36.0.0", + "arrow-buffer 36.0.0", + "arrow-data 36.0.0", + "arrow-schema 36.0.0", "arrow-select", "half", "num", @@ -233,14 +272,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc11fa039338cebbf4e29cf709c8ac1d6a65c7540063d4a25f991ab255ca85c8" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 36.0.0", + "arrow-buffer 36.0.0", + "arrow-data 36.0.0", + "arrow-schema 36.0.0", "half", "hashbrown 0.13.2", ] +[[package]] +name = "arrow-schema" +version = "34.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64951898473bfb8e22293e83a44f02874d2257514d49cd95f9aa4afcff183fbc" + [[package]] name = "arrow-schema" version = "36.0.0" @@ -253,10 +298,10 @@ version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "163e35de698098ff5f5f672ada9dc1f82533f10407c7a11e2cd09f3bcf31d18a" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 36.0.0", + "arrow-buffer 36.0.0", + "arrow-data 36.0.0", + "arrow-schema 36.0.0", "num", ] @@ -266,10 +311,10 @@ version = "36.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bfdfbed1b10209f0dc68e6aa4c43dc76079af65880965c7c3b73f641f23d4aba" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 36.0.0", + "arrow-buffer 36.0.0", + "arrow-data 36.0.0", + "arrow-schema 36.0.0", "arrow-select", "regex", "regex-syntax", @@ -295,13 +340,13 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.67" +version = "0.1.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86ea188f25f0255d8f92797797c97ebf5631fa88178beb1a46fdf5622c9a00e4" +checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" dependencies = [ "proc-macro2", "quote", - "syn 2.0.8", + "syn 2.0.11", ] [[package]] @@ -582,9 +627,9 @@ checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" [[package]] name = "cpufeatures" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d997bd5e24a5928dd43e46dc529867e207907fe0b239c3477d924f7f2ca320" +checksum = "280a9f2d8b3a38871a3c8a46fb80db65e5e5ed97da80c4d08bf27fb63e35e181" dependencies = [ "libc", ] @@ -637,9 +682,9 @@ dependencies = [ [[package]] name = "cxx" -version = "1.0.93" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9c00419335c41018365ddf7e4d5f1c12ee3659ddcf3e01974650ba1de73d038" +checksum = "f61f1b6389c3fe1c316bf8a4dccc90a38208354b330925bce1f74a6c4756eb93" dependencies = [ "cc", "cxxbridge-flags", @@ -649,9 +694,9 @@ dependencies = [ [[package]] name = "cxx-build" -version = "1.0.93" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb8307ad413a98fff033c8545ecf133e3257747b3bae935e7602aab8aa92d4ca" +checksum = "12cee708e8962df2aeb38f594aae5d827c022b6460ac71a7a3e2c3c2aae5a07b" dependencies = [ "cc", "codespan-reporting", @@ -659,24 +704,24 @@ dependencies = [ "proc-macro2", "quote", "scratch", - "syn 2.0.8", + "syn 2.0.11", ] [[package]] name = "cxxbridge-flags" -version = "1.0.93" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edc52e2eb08915cb12596d29d55f0b5384f00d697a646dbd269b6ecb0fbd9d31" +checksum = "7944172ae7e4068c533afbb984114a56c46e9ccddda550499caa222902c7f7bb" [[package]] name = "cxxbridge-macro" -version = "1.0.93" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "631569015d0d8d54e6c241733f944042623ab6df7bc3be7466874b05fcdb1c5f" +checksum = "2345488264226bf682893e25de0769f3360aac9957980ec49361b083ddaa5bc5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.8", + "syn 2.0.11", ] [[package]] @@ -761,7 +806,7 @@ name = "datafusion-common" version = "21.0.0" dependencies = [ "arrow", - "arrow-array", + "arrow-array 36.0.0", "chrono", "num_cpus", "object_store", @@ -817,8 +862,9 @@ version = "21.0.0" dependencies = [ "ahash", "arrow", - "arrow-buffer", - "arrow-schema", + "arrow-array 34.0.0", + "arrow-buffer 36.0.0", + "arrow-schema 36.0.0", "blake2", "blake3", "chrono", @@ -854,7 +900,7 @@ dependencies = [ name = "datafusion-sql" version = "21.0.0" dependencies = [ - "arrow-schema", + "arrow-schema 36.0.0", "datafusion-common", "datafusion-expr", "log", @@ -955,13 +1001,13 @@ dependencies = [ [[package]] name = "errno" -version = "0.2.8" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +checksum = "50d6a0976c999d473fe89ad888d5a284e55366d9dc9038b1ba2aa15128c4afa0" dependencies = [ "errno-dragonfly", "libc", - "winapi", + "windows-sys", ] [[package]] @@ -995,13 +1041,13 @@ dependencies = [ [[package]] name = "fd-lock" -version = "3.0.10" +version = "3.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ef1a30ae415c3a691a4f41afddc2dbcd6d70baf338368d85ebc1e8ed92cedb9" +checksum = "9799aefb4a2e4a01cc47610b1dd47c18ab13d991f27bbcaed9296f5a53d5cbad" dependencies = [ "cfg-if", "rustix", - "windows-sys 0.45.0", + "windows-sys", ] [[package]] @@ -1136,9 +1182,9 @@ dependencies = [ [[package]] name = "generic-array" -version = "0.14.6" +version = "0.14.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", @@ -1314,9 +1360,9 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.54" +version = "0.1.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c17cc76786e99f8d2f055c11159e7f0091c42474dcc3189fbab96072e873e6d" +checksum = "716f12fbcfac6ffab0a5e9ec51d0a0ff70503742bb2dc7b99396394c9dc323f0" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -1348,9 +1394,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "1.9.2" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", "hashbrown 0.12.3", @@ -1379,14 +1425,14 @@ checksum = "09270fd4fa1111bc614ed2246c7ef56239a3063d5be0d1ec3b589c505d400aeb" dependencies = [ "hermit-abi 0.3.1", "libc", - "windows-sys 0.45.0", + "windows-sys", ] [[package]] name = "ipnet" -version = "2.7.1" +version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30e22bd8629359895450b59ea7a776c850561b96a3b1d31321c1949d9e6c9146" +checksum = "12b6ee2129af8d4fb011108c73d99a1b83a85977f23b82460c0ae2e25bb4b57f" [[package]] name = "itertools" @@ -1524,9 +1570,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.1.4" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" +checksum = "cd550e73688e6d578f0ac2119e32b797a327631a42f9433e59d02e139c8df60d" [[package]] name = "lock_api" @@ -1626,7 +1672,7 @@ dependencies = [ "libc", "log", "wasi", - "windows-sys 0.45.0", + "windows-sys", ] [[package]] @@ -1804,9 +1850,9 @@ checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.2.16", "smallvec", - "windows-sys 0.45.0", + "windows-sys", ] [[package]] @@ -1816,12 +1862,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "321a15f8332645759f29875b07f8233d16ed8ec1b3582223de81625a9f8506b7" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", + "arrow-array 36.0.0", + "arrow-buffer 36.0.0", "arrow-cast", - "arrow-data", + "arrow-data 36.0.0", "arrow-ipc", - "arrow-schema", + "arrow-schema 36.0.0", "arrow-select", "base64", "brotli", @@ -1967,9 +2013,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.53" +version = "1.0.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba466839c78239c09faf015484e5cc04860f88242cff4d03eb038f04b4699b73" +checksum = "e472a104799c74b514a57226160104aa483546de37e839ec50e3c2e41dd87534" dependencies = [ "unicode-ident", ] @@ -2042,6 +2088,15 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_syscall" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags", +] + [[package]] name = "redox_users" version = "0.4.3" @@ -2049,15 +2104,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" dependencies = [ "getrandom", - "redox_syscall", + "redox_syscall 0.2.16", "thiserror", ] [[package]] name = "regex" -version = "1.7.2" +version = "1.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cce168fea28d3e05f158bda4576cf0c844d5045bc2cc3620fa0292ed5bb5814c" +checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" dependencies = [ "aho-corasick", "memchr", @@ -2072,9 +2127,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "reqwest" -version = "0.11.15" +version = "0.11.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ba30cc2c0cd02af1222ed216ba659cdb2f879dfe3181852fe7c50b1d0005949" +checksum = "27b71749df584b7f4cac2c426c127a7c785a5106cc98f7a8feb044115f0fa254" dependencies = [ "base64", "bytes", @@ -2137,16 +2192,16 @@ dependencies = [ [[package]] name = "rustix" -version = "0.36.11" +version = "0.37.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db4165c9963ab29e422d6c26fbc1d37f15bace6b2810221f9d925023480fcf0e" +checksum = "0e78cc525325c06b4a7ff02db283472f3c042b7ff0c391f96c6d5ac6f4f91b75" dependencies = [ "bitflags", "errno", "io-lifetimes", "libc", "linux-raw-sys", - "windows-sys 0.45.0", + "windows-sys", ] [[package]] @@ -2250,29 +2305,29 @@ checksum = "e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc" [[package]] name = "serde" -version = "1.0.158" +version = "1.0.159" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "771d4d9c4163ee138805e12c710dd365e4f44be8be0503cb1bb9eb989425d9c9" +checksum = "3c04e8343c3daeec41f58990b9d77068df31209f2af111e059e9fe9646693065" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.158" +version = "1.0.159" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e801c1712f48475582b7696ac71e0ca34ebb30e09338425384269d9717c62cad" +checksum = "4c614d17805b093df4b147b51339e7e44bf05ef59fba1e45d83500bcfb4d8585" dependencies = [ "proc-macro2", "quote", - "syn 2.0.8", + "syn 2.0.11", ] [[package]] name = "serde_json" -version = "1.0.94" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c533a59c9d8a93a09c6ab31f0fd5e5f4dd1b8fc9434804029839884765d04ea" +checksum = "d721eca97ac802aa7777b701877c8004d950fc142651367300d21c1cc0194744" dependencies = [ "itoa", "ryu", @@ -2444,9 +2499,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.8" +version = "2.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcc02725fd69ab9f26eab07fad303e2497fad6fb9eba4f96c4d1687bdf704ad9" +checksum = "21e3787bb71465627110e7d87ed4faaa36c1f61042ee67badb9e2ef173accc40" dependencies = [ "proc-macro2", "quote", @@ -2455,15 +2510,15 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.4.0" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af18f7ae1acd354b992402e9ec5864359d693cd8a79dcbef59f76891701c1e95" +checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998" dependencies = [ "cfg-if", "fastrand", - "redox_syscall", + "redox_syscall 0.3.5", "rustix", - "windows-sys 0.42.0", + "windows-sys", ] [[package]] @@ -2498,7 +2553,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.8", + "syn 2.0.11", ] [[package]] @@ -2538,32 +2593,31 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.26.0" +version = "1.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03201d01c3c27a29c8a5cee5b55a93ddae1ccf6f08f65365c2c918f8c1b76f64" +checksum = "d0de47a4eecbe11f498978a9b29d792f0d2692d1dd003650c24c76510e3bc001" dependencies = [ "autocfg", "bytes", "libc", - "memchr", "mio", "num_cpus", "parking_lot", "pin-project-lite", "socket2", "tokio-macros", - "windows-sys 0.45.0", + "windows-sys", ] [[package]] name = "tokio-macros" -version = "1.8.2" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d266c00fde287f55d3f1c3e96c500c362a2b8c695076ec180f27918820bc6df8" +checksum = "61a573bdc87985e9d6ddeed1b3d864e8a302c847e40d647746df2f1de209d1ce" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.11", ] [[package]] @@ -2900,50 +2954,50 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows" -version = "0.46.0" +version = "0.47.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdacb41e6a96a052c6cb63a144f24900236121c6f63f4f8219fef5977ecb0c25" +checksum = "2649ff315bee4c98757f15dac226efe3d81927adbb6e882084bb1ee3e0c330a7" dependencies = [ - "windows-targets", + "windows-targets 0.47.0", ] [[package]] name = "windows-sys" -version = "0.42.0" +version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows-targets 0.42.2", ] [[package]] -name = "windows-sys" -version = "0.45.0" +name = "windows-targets" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" dependencies = [ - "windows-targets", + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", ] [[package]] name = "windows-targets" -version = "0.42.2" +version = "0.47.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +checksum = "2f8996d3f43b4b2d44327cd71b7b0efd1284ab60e6e9d0e8b630e18555d87d3e" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.47.0", + "windows_aarch64_msvc 0.47.0", + "windows_i686_gnu 0.47.0", + "windows_i686_msvc 0.47.0", + "windows_x86_64_gnu 0.47.0", + "windows_x86_64_gnullvm 0.47.0", + "windows_x86_64_msvc 0.47.0", ] [[package]] @@ -2952,42 +3006,84 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "831d567d53d4f3cb1db332b68e6e2b6260228eb4d99a777d8b2e8ed794027c90" + [[package]] name = "windows_aarch64_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" +[[package]] +name = "windows_aarch64_msvc" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a42d54a417c60ce4f0e31661eed628f0fa5aca73448c093ec4d45fab4c51cdf" + [[package]] name = "windows_i686_gnu" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" +[[package]] +name = "windows_i686_gnu" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1925beafdbb22201a53a483db861a5644123157c1c3cee83323a2ed565d71e3" + [[package]] name = "windows_i686_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" +[[package]] +name = "windows_i686_msvc" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8ef8f2f1711b223947d9b69b596cf5a4e452c930fb58b6fc3fdae7d0ec6b31" + [[package]] name = "windows_x86_64_gnu" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" +[[package]] +name = "windows_x86_64_gnu" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7acaa0c2cf0d2ef99b61c308a0c3dbae430a51b7345dedec470bd8f53f5a3642" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a0628f71be1d11e17ca4a0e9e15b3a5180f6fbf1c2d55e3ba3f850378052c1" + [[package]] name = "windows_x86_64_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" +[[package]] +name = "windows_x86_64_msvc" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d6e62c256dc6d40b8c8707df17df8d774e60e39db723675241e7c15e910bce7" + [[package]] name = "winreg" version = "0.10.1" diff --git a/datafusion/common/src/cast.rs b/datafusion/common/src/cast.rs index fe909e775815..b9cc429d7023 100644 --- a/datafusion/common/src/cast.rs +++ b/datafusion/common/src/cast.rs @@ -26,7 +26,8 @@ use arrow::{ Array, BinaryArray, BooleanArray, Date32Array, Date64Array, Decimal128Array, DictionaryArray, FixedSizeBinaryArray, FixedSizeListArray, Float32Array, Float64Array, GenericBinaryArray, GenericListArray, GenericStringArray, - Int32Array, Int64Array, LargeListArray, ListArray, MapArray, NullArray, + Int32Array, Int64Array, IntervalDayTimeArray, IntervalMonthDayNanoArray, + IntervalYearMonthArray, LargeListArray, ListArray, MapArray, NullArray, OffsetSizeTrait, PrimitiveArray, StringArray, StructArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt32Array, UInt64Array, UnionArray, @@ -168,6 +169,27 @@ pub fn as_timestamp_second_array(array: &dyn Array) -> Result<&TimestampSecondAr Ok(downcast_value!(array, TimestampSecondArray)) } +// Downcast ArrayRef to IntervalYearMonthArray +pub fn as_interval_ym_array( + array: &dyn Array, +) -> Result<&IntervalYearMonthArray, DataFusionError> { + Ok(downcast_value!(array, IntervalYearMonthArray)) +} + +// Downcast ArrayRef to IntervalDayTimeArray +pub fn as_interval_dt_array( + array: &dyn Array, +) -> Result<&IntervalDayTimeArray, DataFusionError> { + Ok(downcast_value!(array, IntervalDayTimeArray)) +} + +// Downcast ArrayRef to IntervalMonthDayNanoArray +pub fn as_interval_mdn_array( + array: &dyn Array, +) -> Result<&IntervalMonthDayNanoArray, DataFusionError> { + Ok(downcast_value!(array, IntervalMonthDayNanoArray)) +} + // Downcast ArrayRef to BinaryArray pub fn as_binary_array(array: &dyn Array) -> Result<&BinaryArray> { Ok(downcast_value!(array, BinaryArray)) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index d1c9107a7a73..8b55b0a79bf6 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -44,12 +44,14 @@ use arrow::{ }, }; use arrow_array::timezone::Tz; -use chrono::{DateTime, Datelike, Duration, NaiveDate, NaiveDateTime, TimeZone}; +use chrono::{Datelike, Duration, NaiveDate, NaiveDateTime}; // Constants we use throughout this file: const MILLISECS_IN_ONE_DAY: i64 = 86_400_000; const NANOSECS_IN_ONE_DAY: i64 = 86_400_000_000_000; +const SECS_IN_ONE_MONTH: i64 = 2_592_000; // assuming 30 days. const MILLISECS_IN_ONE_MONTH: i64 = 2_592_000_000; // assuming 30 days. +const MICROSECS_IN_ONE_MONTH: i64 = 2_592_000_000_000; // assuming 30 days. const NANOSECS_IN_ONE_MONTH: i128 = 2_592_000_000_000_000; // assuming 30 days. /// Represents a dynamically typed, nullable single value. @@ -378,41 +380,111 @@ impl PartialOrd for ScalarValue { /// This function computes the duration (in milliseconds) of the given /// year-month-interval. #[inline] -fn ym_to_milli(val: &Option) -> Option { +pub fn ym_to_sec(val: &Option) -> Option { + val.map(|value| (value as i64) * SECS_IN_ONE_MONTH) +} + +/// This function computes the duration (in milliseconds) of the given +/// year-month-interval. +#[inline] +pub fn ym_to_milli(val: &Option) -> Option { val.map(|value| (value as i64) * MILLISECS_IN_ONE_MONTH) } +/// This function computes the duration (in milliseconds) of the given +/// year-month-interval. +#[inline] +pub fn ym_to_micro(val: &Option) -> Option { + val.map(|value| (value as i64) * MICROSECS_IN_ONE_MONTH) +} + /// This function computes the duration (in nanoseconds) of the given /// year-month-interval. #[inline] -fn ym_to_nano(val: &Option) -> Option { +pub fn ym_to_nano(val: &Option) -> Option { val.map(|value| (value as i128) * NANOSECS_IN_ONE_MONTH) } +/// This function computes the duration (in seconds) of the given +/// daytime-interval. +#[inline] +pub fn dt_to_sec(val: &Option) -> Option { + val.map(|val| { + let (days, millis) = IntervalDayTimeType::to_parts(val); + (days as i64) * MILLISECS_IN_ONE_DAY + (millis as i64 / 1_000) + }) +} + /// This function computes the duration (in milliseconds) of the given /// daytime-interval. #[inline] -fn dt_to_milli(val: &Option) -> Option { +pub fn dt_to_milli(val: &Option) -> Option { val.map(|val| { let (days, millis) = IntervalDayTimeType::to_parts(val); (days as i64) * MILLISECS_IN_ONE_DAY + (millis as i64) }) } +/// This function computes the duration (in microseconds) of the given +/// daytime-interval. +#[inline] +pub fn dt_to_micro(val: &Option) -> Option { + val.map(|val| { + let (days, millis) = IntervalDayTimeType::to_parts(val); + (days as i128) * (NANOSECS_IN_ONE_DAY as i128) + (millis as i128) * 1_000 + }) +} + /// This function computes the duration (in nanoseconds) of the given /// daytime-interval. #[inline] -fn dt_to_nano(val: &Option) -> Option { +pub fn dt_to_nano(val: &Option) -> Option { val.map(|val| { let (days, millis) = IntervalDayTimeType::to_parts(val); (days as i128) * (NANOSECS_IN_ONE_DAY as i128) + (millis as i128) * 1_000_000 }) } +/// This function computes the duration (in seconds) of the given +/// month-day-nano-interval. Assumes a month is 30 days long. +#[inline] +pub fn mdn_to_sec(val: &Option) -> Option { + val.map(|val| { + let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(val); + (months as i128) * NANOSECS_IN_ONE_MONTH + + (days as i128) * (NANOSECS_IN_ONE_DAY as i128) + + (nanos as i128) / 1_000_000_000 + }) +} + +/// This function computes the duration (in milliseconds) of the given +/// month-day-nano-interval. Assumes a month is 30 days long. +#[inline] +pub fn mdn_to_milli(val: &Option) -> Option { + val.map(|val| { + let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(val); + (months as i128) * NANOSECS_IN_ONE_MONTH + + (days as i128) * (NANOSECS_IN_ONE_DAY as i128) + + (nanos as i128) / 1_000_000 + }) +} + +/// This function computes the duration (in microseconds) of the given +/// month-day-nano-interval. Assumes a month is 30 days long. +#[inline] +pub fn mdn_to_micro(val: &Option) -> Option { + val.map(|val| { + let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(val); + (months as i128) * NANOSECS_IN_ONE_MONTH + + (days as i128) * (NANOSECS_IN_ONE_DAY as i128) + + (nanos as i128) / 1_000 + }) +} + /// This function computes the duration (in nanoseconds) of the given /// month-day-nano-interval. Assumes a month is 30 days long. #[inline] -fn mdn_to_nano(val: &Option) -> Option { +pub fn mdn_to_nano(val: &Option) -> Option { val.map(|val| { let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(val); (months as i128) * NANOSECS_IN_ONE_MONTH @@ -567,23 +639,21 @@ macro_rules! impl_op { "Overflow while converting seconds to milliseconds".to_string(), ) }; - ts_sub_to_interval( + ts_sub_to_interval::( ts_lhs.checked_mul(1_000).ok_or_else(err)?, ts_rhs.checked_mul(1_000).ok_or_else(err)?, &tz_lhs, &tz_rhs, - IntervalMode::Milli, ) }, ( ScalarValue::TimestampMillisecond(Some(ts_lhs), tz_lhs), ScalarValue::TimestampMillisecond(Some(ts_rhs), tz_rhs), - ) => ts_sub_to_interval( + ) => ts_sub_to_interval::( *ts_lhs, *ts_rhs, tz_lhs, tz_rhs, - IntervalMode::Milli, ), ( ScalarValue::TimestampMicrosecond(Some(ts_lhs), tz_lhs), @@ -594,23 +664,21 @@ macro_rules! impl_op { "Overflow while converting microseconds to nanoseconds".to_string(), ) }; - ts_sub_to_interval( + ts_sub_to_interval::( ts_lhs.checked_mul(1_000).ok_or_else(err)?, ts_rhs.checked_mul(1_000).ok_or_else(err)?, tz_lhs, tz_rhs, - IntervalMode::Nano, ) }, ( ScalarValue::TimestampNanosecond(Some(ts_lhs), tz_lhs), ScalarValue::TimestampNanosecond(Some(ts_rhs), tz_rhs), - ) => ts_sub_to_interval( + ) => ts_sub_to_interval::( *ts_lhs, *ts_rhs, tz_lhs, tz_rhs, - IntervalMode::Nano, ), _ => impl_op_arithmetic!($LHS, $RHS, -) } @@ -660,37 +728,27 @@ macro_rules! impl_op_arithmetic { ( ScalarValue::IntervalYearMonth(Some(lhs)), ScalarValue::IntervalYearMonth(Some(rhs)), - ) => Ok(ScalarValue::new_interval_ym( - 0, - lhs + rhs * get_sign!($OPERATION), - )), + ) => Ok(ScalarValue::IntervalYearMonth(Some(op_ym( + *lhs, + *rhs, + get_sign!($OPERATION), + )))), ( ScalarValue::IntervalDayTime(Some(lhs)), ScalarValue::IntervalDayTime(Some(rhs)), - ) => { - let sign = get_sign!($OPERATION); - let (lhs_days, lhs_millis) = IntervalDayTimeType::to_parts(*lhs); - let (rhs_days, rhs_millis) = IntervalDayTimeType::to_parts(*rhs); - Ok(ScalarValue::new_interval_dt( - lhs_days + rhs_days * sign, - lhs_millis + rhs_millis * sign, - )) - } + ) => Ok(ScalarValue::IntervalDayTime(Some(op_dt( + *lhs, + *rhs, + get_sign!($OPERATION), + )))), ( ScalarValue::IntervalMonthDayNano(Some(lhs)), ScalarValue::IntervalMonthDayNano(Some(rhs)), - ) => { - let sign = get_sign!($OPERATION); - let (lhs_months, lhs_days, lhs_nanos) = - IntervalMonthDayNanoType::to_parts(*lhs); - let (rhs_months, rhs_days, rhs_nanos) = - IntervalMonthDayNanoType::to_parts(*rhs); - Ok(ScalarValue::new_interval_mdn( - lhs_months + rhs_months * sign, - lhs_days + rhs_days * sign, - lhs_nanos + rhs_nanos * (sign as i64), - )) - } + ) => Ok(ScalarValue::IntervalMonthDayNano(Some(op_mdn( + *lhs, + *rhs, + get_sign!($OPERATION), + )))), // Binary operations on arguments with different types: (ScalarValue::Date32(Some(days)), _) => { let value = date32_add(*days, $RHS, get_sign!($OPERATION))?; @@ -735,27 +793,57 @@ macro_rules! impl_op_arithmetic { ( ScalarValue::IntervalYearMonth(Some(lhs)), ScalarValue::IntervalDayTime(Some(rhs)), - ) => op_ym_dt(*lhs, *rhs, get_sign!($OPERATION), false), + ) => Ok(ScalarValue::IntervalMonthDayNano(Some(op_ym_dt( + *lhs, + *rhs, + get_sign!($OPERATION), + false, + )))), ( ScalarValue::IntervalYearMonth(Some(lhs)), ScalarValue::IntervalMonthDayNano(Some(rhs)), - ) => op_ym_mdn(*lhs, *rhs, get_sign!($OPERATION), false), + ) => Ok(ScalarValue::IntervalMonthDayNano(Some(op_ym_mdn( + *lhs, + *rhs, + get_sign!($OPERATION), + false, + )))), ( ScalarValue::IntervalDayTime(Some(lhs)), ScalarValue::IntervalYearMonth(Some(rhs)), - ) => op_ym_dt(*rhs, *lhs, get_sign!($OPERATION), true), + ) => Ok(ScalarValue::IntervalMonthDayNano(Some(op_ym_dt( + *rhs, + *lhs, + get_sign!($OPERATION), + true, + )))), ( ScalarValue::IntervalDayTime(Some(lhs)), ScalarValue::IntervalMonthDayNano(Some(rhs)), - ) => op_dt_mdn(*lhs, *rhs, get_sign!($OPERATION), false), + ) => Ok(ScalarValue::IntervalMonthDayNano(Some(op_dt_mdn( + *lhs, + *rhs, + get_sign!($OPERATION), + false, + )))), ( ScalarValue::IntervalMonthDayNano(Some(lhs)), ScalarValue::IntervalYearMonth(Some(rhs)), - ) => op_ym_mdn(*rhs, *lhs, get_sign!($OPERATION), true), + ) => Ok(ScalarValue::IntervalMonthDayNano(Some(op_ym_mdn( + *rhs, + *lhs, + get_sign!($OPERATION), + true, + )))), ( ScalarValue::IntervalMonthDayNano(Some(lhs)), ScalarValue::IntervalDayTime(Some(rhs)), - ) => op_dt_mdn(*rhs, *lhs, get_sign!($OPERATION), true), + ) => Ok(ScalarValue::IntervalMonthDayNano(Some(op_dt_mdn( + *rhs, + *lhs, + get_sign!($OPERATION), + true, + )))), _ => Err(DataFusionError::Internal(format!( "Operator {} is not implemented for types {:?} and {:?}", stringify!($OPERATION), @@ -769,10 +857,10 @@ macro_rules! impl_op_arithmetic { /// This function adds/subtracts two "raw" intervals (`lhs` and `rhs`) of different /// types ([`IntervalYearMonthType`] and [`IntervalDayTimeType`], respectively). /// The argument `sign` chooses between addition and subtraction, the argument -/// `commute` swaps `lhs` and `rhs`. The return value is an interval [`ScalarValue`] -/// with type data type [`IntervalMonthDayNanoType`]. +/// `commute` swaps `lhs` and `rhs`. The return value is an 128-bit integer. +/// It can be involved in a [`IntervalMonthDayNanoType`] in the outer scope. #[inline] -fn op_ym_dt(mut lhs: i32, rhs: i64, sign: i32, commute: bool) -> Result { +pub fn op_ym_dt(mut lhs: i32, rhs: i64, sign: i32, commute: bool) -> i128 { let (mut days, millis) = IntervalDayTimeType::to_parts(rhs); let mut nanos = (millis as i64) * 1_000_000; if commute { @@ -781,16 +869,16 @@ fn op_ym_dt(mut lhs: i32, rhs: i64, sign: i32, commute: bool) -> Result Result { +pub fn op_ym_mdn(lhs: i32, rhs: i128, sign: i32, commute: bool) -> i128 { let (mut months, mut days, mut nanos) = IntervalMonthDayNanoType::to_parts(rhs); if commute { months += lhs * sign; @@ -799,20 +887,19 @@ fn op_ym_mdn(lhs: i32, rhs: i128, sign: i32, commute: bool) -> Result Result { +pub fn op_dt_mdn(lhs: i64, rhs: i128, sign: i32, commute: bool) -> i128 { let (lhs_days, lhs_millis) = IntervalDayTimeType::to_parts(lhs); let (rhs_months, rhs_days, rhs_nanos) = IntervalMonthDayNanoType::to_parts(rhs); - - let result = if commute { + if commute { IntervalMonthDayNanoType::make_value( rhs_months, lhs_days * sign + rhs_days, @@ -824,8 +911,45 @@ fn op_dt_mdn(lhs: i64, rhs: i128, sign: i32, commute: bool) -> Result i32 { + lhs + rhs * sign +} + +/// This function adds/subtracts two "raw" intervals (`lhs` and `rhs`) of +/// the same type [`IntervalDayTimeType`]. The argument `sign` chooses between +/// addition and subtraction. The return value is an 64-bit integer. It can be +/// involved in a [`IntervalDayTimeType`] in the outer scope. +#[inline] +pub fn op_dt(lhs: i64, rhs: i64, sign: i32) -> i64 { + let (lhs_days, lhs_millis) = IntervalDayTimeType::to_parts(lhs); + let (rhs_days, rhs_millis) = IntervalDayTimeType::to_parts(rhs); + IntervalDayTimeType::make_value( + lhs_days + rhs_days * sign, + lhs_millis + rhs_millis * sign, + ) +} + +/// This function adds/subtracts two "raw" intervals (`lhs` and `rhs`) of +/// the same type [`IntervalMonthDayNanoType`]. The argument `sign` chooses between +/// addition and subtraction. The return value is an 128-bit integer. It can be +/// involved in a [`IntervalMonthDayNanoType`] in the outer scope. +#[inline] +pub fn op_mdn(lhs: i128, rhs: i128, sign: i32) -> i128 { + let (lhs_months, lhs_days, lhs_nanos) = IntervalMonthDayNanoType::to_parts(lhs); + let (rhs_months, rhs_days, rhs_nanos) = IntervalMonthDayNanoType::to_parts(rhs); + IntervalMonthDayNanoType::make_value( + lhs_months + rhs_months * sign, + lhs_days + rhs_days * sign, + lhs_nanos + rhs_nanos * (sign as i64), + ) } macro_rules! get_sign { @@ -837,40 +961,42 @@ macro_rules! get_sign { }; } -#[derive(Clone, Copy)] -enum IntervalMode { - Milli, - Nano, -} +pub const YM_MODE: i8 = 0; +pub const DT_MODE: i8 = 1; +pub const MDN_MODE: i8 = 2; +pub const MILLISECOND_MODE: bool = false; +pub const NANOSECOND_MODE: bool = true; /// This function computes subtracts `rhs_ts` from `lhs_ts`, taking timezones /// into account when given. Units of the resulting interval is specified by -/// the argument `mode`. +/// the constant `TIME_MODE`. /// The default behavior of Datafusion is the following: /// - When subtracting timestamps at seconds/milliseconds precision, the output /// interval will have the type [`IntervalDayTimeType`]. /// - When subtracting timestamps at microseconds/nanoseconds precision, the /// output interval will have the type [`IntervalMonthDayNanoType`]. -fn ts_sub_to_interval( +fn ts_sub_to_interval( lhs_ts: i64, rhs_ts: i64, lhs_tz: &Option, rhs_tz: &Option, - mode: IntervalMode, ) -> Result { - let lhs_dt = with_timezone_to_naive_datetime(lhs_ts, lhs_tz, mode)?; - let rhs_dt = with_timezone_to_naive_datetime(rhs_ts, rhs_tz, mode)?; - let delta_secs = lhs_dt.signed_duration_since(rhs_dt); + let parsed_lhs_tz = parse_timezones(lhs_tz)?; + let parsed_rhs_tz = parse_timezones(rhs_tz)?; + + let (naive_lhs, naive_rhs) = + calculate_naives::(lhs_ts, parsed_lhs_tz, rhs_ts, parsed_rhs_tz)?; + let delta_secs = naive_lhs.signed_duration_since(naive_rhs); - match mode { - IntervalMode::Milli => { + match TIME_MODE { + MILLISECOND_MODE => { let as_millisecs = delta_secs.num_milliseconds(); Ok(ScalarValue::new_interval_dt( (as_millisecs / MILLISECS_IN_ONE_DAY) as i32, (as_millisecs % MILLISECS_IN_ONE_DAY) as i32, )) } - IntervalMode::Nano => { + NANOSECOND_MODE => { let as_nanosecs = delta_secs.num_nanoseconds().ok_or_else(|| { DataFusionError::Execution(String::from( "Can not compute timestamp differences with nanosecond precision", @@ -885,50 +1011,83 @@ fn ts_sub_to_interval( } } -/// This function creates the [`NaiveDateTime`] object corresponding to the -/// given timestamp using the units (tick size) implied by argument `mode`. -#[inline] -fn with_timezone_to_naive_datetime( - ts: i64, - tz: &Option, - mode: IntervalMode, -) -> Result { - let datetime = if let IntervalMode::Milli = mode { - ticks_to_naive_datetime::<1_000_000>(ts) - } else { - ticks_to_naive_datetime::<1>(ts) - }?; - +/// This function parses the timezone from string to Tz. +/// If it cannot parse or timezone field is [`None`], it returns [`None`]. +pub fn parse_timezones(tz: &Option) -> Result> { if let Some(tz) = tz { let parsed_tz: Tz = FromStr::from_str(tz).map_err(|_| { DataFusionError::Execution("cannot parse given timezone".to_string()) })?; - let offset = parsed_tz - .offset_from_local_datetime(&datetime) - .single() - .ok_or_else(|| { - DataFusionError::Execution( - "error conversion result of timezone offset".to_string(), - ) - })?; - return Ok(DateTime::::from_local(datetime, offset).naive_utc()); + Ok(Some(parsed_tz)) + } else { + Ok(None) } - Ok(datetime) } -/// This function creates the [`NaiveDateTime`] object corresponding to the -/// given timestamp, whose tick size is specified by `UNIT_NANOS`. -#[inline] -fn ticks_to_naive_datetime(ticks: i64) -> Result { - NaiveDateTime::from_timestamp_opt( - (ticks * UNIT_NANOS) / 1_000_000_000, - ((ticks * UNIT_NANOS) % 1_000_000_000) as u32, - ) - .ok_or_else(|| { - DataFusionError::Execution( - "Can not convert given timestamp to a NaiveDateTime".to_string(), - ) - }) +/// This function takes two timestamps with an optional timezone, +/// and returns the duration between them. If one of the timestamps +/// has a [`None`] timezone, the other one is also treated as having [`None`]. +pub fn calculate_naives( + lhs_ts: i64, + parsed_lhs_tz: Option, + rhs_ts: i64, + parsed_rhs_tz: Option, +) -> Result<(NaiveDateTime, NaiveDateTime)> { + let err = || { + DataFusionError::Execution(String::from( + "error while converting Int64 to DateTime in timestamp subtraction", + )) + }; + match (parsed_lhs_tz, parsed_rhs_tz, TIME_MODE) { + (Some(lhs_tz), Some(rhs_tz), MILLISECOND_MODE) => { + let lhs = arrow_array::temporal_conversions::as_datetime_with_timezone::< + arrow_array::types::TimestampMillisecondType, + >(lhs_ts, rhs_tz) + .ok_or_else(err)? + .naive_local(); + let rhs = arrow_array::temporal_conversions::as_datetime_with_timezone::< + arrow_array::types::TimestampMillisecondType, + >(rhs_ts, lhs_tz) + .ok_or_else(err)? + .naive_local(); + Ok((lhs, rhs)) + } + (Some(lhs_tz), Some(rhs_tz), NANOSECOND_MODE) => { + let lhs = arrow_array::temporal_conversions::as_datetime_with_timezone::< + arrow_array::types::TimestampNanosecondType, + >(lhs_ts, rhs_tz) + .ok_or_else(err)? + .naive_local(); + let rhs = arrow_array::temporal_conversions::as_datetime_with_timezone::< + arrow_array::types::TimestampNanosecondType, + >(rhs_ts, lhs_tz) + .ok_or_else(err)? + .naive_local(); + Ok((lhs, rhs)) + } + (_, _, MILLISECOND_MODE) => { + let lhs = arrow_array::temporal_conversions::as_datetime::< + arrow_array::types::TimestampMillisecondType, + >(lhs_ts) + .ok_or_else(err)?; + let rhs = arrow_array::temporal_conversions::as_datetime::< + arrow_array::types::TimestampMillisecondType, + >(rhs_ts) + .ok_or_else(err)?; + Ok((lhs, rhs)) + } + (_, _, NANOSECOND_MODE) => { + let lhs = arrow_array::temporal_conversions::as_datetime::< + arrow_array::types::TimestampNanosecondType, + >(lhs_ts) + .ok_or_else(err)?; + let rhs = arrow_array::temporal_conversions::as_datetime::< + arrow_array::types::TimestampNanosecondType, + >(rhs_ts) + .ok_or_else(err)?; + Ok((lhs, rhs)) + } + } } #[inline] @@ -950,6 +1109,16 @@ pub fn seconds_add(ts_s: i64, scalar: &ScalarValue, sign: i32) -> Result { do_date_time_math(ts_s, 0, scalar, sign).map(|dt| dt.timestamp()) } +#[inline] +pub fn seconds_add_array( + ts_s: i64, + interval: i128, + sign: i32, +) -> Result { + do_date_time_math_array::(ts_s, 0, interval, sign) + .map(|dt| dt.timestamp()) +} + #[inline] pub fn milliseconds_add(ts_ms: i64, scalar: &ScalarValue, sign: i32) -> Result { let secs = ts_ms / 1000; @@ -957,6 +1126,22 @@ pub fn milliseconds_add(ts_ms: i64, scalar: &ScalarValue, sign: i32) -> Result( + ts_ms: i64, + interval: i128, + sign: i32, +) -> Result { + let mut secs = ts_ms / 1000; + let mut nsecs = ((ts_ms % 1000) * 1_000_000) as i32; + if nsecs < 0 { + secs -= 1; + nsecs += 1_000_000_000; + } + do_date_time_math_array::(secs, nsecs as u32, interval, sign) + .map(|dt| dt.timestamp_millis()) +} + #[inline] pub fn microseconds_add(ts_us: i64, scalar: &ScalarValue, sign: i32) -> Result { let secs = ts_us / 1_000_000; @@ -964,6 +1149,22 @@ pub fn microseconds_add(ts_us: i64, scalar: &ScalarValue, sign: i32) -> Result( + ts_us: i64, + interval: i128, + sign: i32, +) -> Result { + let mut secs = ts_us / 1_000_000; + let mut nsecs = ((ts_us % 1_000_000) * 1000) as i32; + if nsecs < 0 { + secs -= 1; + nsecs += 1_000_000_000; + } + do_date_time_math_array::(secs, nsecs as u32, interval, sign) + .map(|dt| dt.timestamp_nanos() / 1000) +} + #[inline] pub fn nanoseconds_add(ts_ns: i64, scalar: &ScalarValue, sign: i32) -> Result { let secs = ts_ns / 1_000_000_000; @@ -971,6 +1172,51 @@ pub fn nanoseconds_add(ts_ns: i64, scalar: &ScalarValue, sign: i32) -> Result( + ts_ns: i64, + interval: i128, + sign: i32, +) -> Result { + let mut secs = ts_ns / 1_000_000_000; + let mut nsecs = (ts_ns % 1_000_000_000) as i32; + if nsecs < 0 { + secs -= 1; + nsecs += 1_000_000_000; + } + do_date_time_math_array::(secs, nsecs as u32, interval, sign) + .map(|dt| dt.timestamp_nanos()) +} + +#[inline] +pub fn seconds_sub(ts_lhs: i64, ts_rhs: i64) -> i64 { + let diff_ms = (ts_lhs - ts_rhs) * 1000; + let days = (diff_ms / MILLISECS_IN_ONE_DAY) as i32; + let millis = (diff_ms % MILLISECS_IN_ONE_DAY) as i32; + IntervalDayTimeType::make_value(days, millis) +} +#[inline] +pub fn milliseconds_sub(ts_lhs: i64, ts_rhs: i64) -> i64 { + let diff_ms = ts_lhs - ts_rhs; + let days = (diff_ms / MILLISECS_IN_ONE_DAY) as i32; + let millis = (diff_ms % MILLISECS_IN_ONE_DAY) as i32; + IntervalDayTimeType::make_value(days, millis) +} +#[inline] +pub fn microseconds_sub(ts_lhs: i64, ts_rhs: i64) -> i128 { + let diff_ns = (ts_lhs - ts_rhs) * 1000; + let days = (diff_ns / NANOSECS_IN_ONE_DAY) as i32; + let nanos = diff_ns % NANOSECS_IN_ONE_DAY; + IntervalMonthDayNanoType::make_value(0, days, nanos) +} +#[inline] +pub fn nanoseconds_sub(ts_lhs: i64, ts_rhs: i64) -> i128 { + let diff_ns = ts_lhs - ts_rhs; + let days = (diff_ns / NANOSECS_IN_ONE_DAY) as i32; + let nanos = diff_ns % NANOSECS_IN_ONE_DAY; + IntervalMonthDayNanoType::make_value(0, days, nanos) +} + #[inline] fn do_date_time_math( secs: i64, @@ -986,6 +1232,21 @@ fn do_date_time_math( do_date_math(prior, scalar, sign) } +#[inline] +fn do_date_time_math_array( + secs: i64, + nsecs: u32, + interval: i128, + sign: i32, +) -> Result { + let prior = NaiveDateTime::from_timestamp_opt(secs, nsecs).ok_or_else(|| { + DataFusionError::Internal(format!( + "Could not conert to NaiveDateTime: secs {secs} nsecs {nsecs}" + )) + })?; + do_date_math_array::<_, INTERVAL_MODE>(prior, interval, sign) +} + fn do_date_math(prior: D, scalar: &ScalarValue, sign: i32) -> Result where D: Datelike + Add, @@ -1000,6 +1261,26 @@ where }) } +fn do_date_math_array( + prior: D, + interval: i128, + sign: i32, +) -> Result +where + D: Datelike + Add, +{ + Ok(match INTERVAL_MODE { + YM_MODE => shift_months(prior, interval as i32 * sign), + DT_MODE => add_day_time(prior, interval as i64, sign), + MDN_MODE => add_m_d_nano(prior, interval, sign), + _ => { + return Err(DataFusionError::Internal( + "Undefined interval mode for interval calculations".to_string(), + )); + } + }) +} + // Can remove once chrono:0.4.23 is released fn add_m_d_nano(prior: D, interval: i128, sign: i32) -> D where @@ -5304,6 +5585,30 @@ mod tests { ), ScalarValue::new_interval_dt(0, 0), ), + // 11th test case, negative results + ( + ScalarValue::TimestampMillisecond( + Some( + NaiveDate::from_ymd_opt(2023, 3, 17) + .unwrap() + .and_hms_milli_opt(4, 10, 0, 0) + .unwrap() + .timestamp_millis(), + ), + None, + ), + ScalarValue::TimestampMillisecond( + Some( + NaiveDate::from_ymd_opt(2023, 3, 17) + .unwrap() + .and_hms_milli_opt(4, 10, 0, 1) + .unwrap() + .timestamp_millis(), + ), + None, + ), + ScalarValue::new_interval_dt(0, -sign), + ), ] } diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs index 3ac14f1c82ba..00918638e916 100644 --- a/datafusion/core/tests/sql/mod.rs +++ b/datafusion/core/tests/sql/mod.rs @@ -1331,6 +1331,54 @@ where Ok(Arc::new(table)) } +fn make_timestamp_tz_sub_table( + tz1: Option, + tz2: Option, +) -> Result> +where + A: ArrowTimestampType, +{ + let schema = Arc::new(Schema::new(vec![ + Field::new("ts1", DataType::Timestamp(A::UNIT, tz1.clone()), false), + Field::new("ts2", DataType::Timestamp(A::UNIT, tz2.clone()), false), + Field::new("val", DataType::Int32, true), + ])); + + let divisor = match A::UNIT { + TimeUnit::Nanosecond => 1, + TimeUnit::Microsecond => 1000, + TimeUnit::Millisecond => 1_000_000, + TimeUnit::Second => 1_000_000_000, + }; + + let timestamps1 = vec![ + 1_678_892_420_000_000_000i64 / divisor, //2023-03-15T15:00:20.000_000_000 + 1_678_892_410_000_000_000i64 / divisor, //2023-03-15T15:00:10.000_000_000 + 1_678_892_430_000_000_000i64 / divisor, //2023-03-15T15:00:30.000_000_000 + ]; + let timestamps2 = vec![ + 1_678_892_400_000_000_000i64 / divisor, //2023-03-15T15:00:00.000_000_000 + 1_678_892_400_000_000_000i64 / divisor, //2023-03-15T15:00:00.000_000_000 + 1_678_892_400_000_000_000i64 / divisor, //2023-03-15T15:00:00.000_000_000 + ]; + + let array1 = + PrimitiveArray::::from_iter_values(timestamps1).with_timezone_opt(tz1); + let array2 = + PrimitiveArray::::from_iter_values(timestamps2).with_timezone_opt(tz2); + + let data = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(array1), + Arc::new(array2), + Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)])), + ], + )?; + let table = MemTable::try_new(schema, vec![vec![data]])?; + Ok(Arc::new(table)) +} + fn make_timestamp_nano_table() -> Result> { make_timestamp_table::() } diff --git a/datafusion/core/tests/sql/set_variable.rs b/datafusion/core/tests/sql/set_variable.rs index f0092b3e50f7..b89264ebbab9 100644 --- a/datafusion/core/tests/sql/set_variable.rs +++ b/datafusion/core/tests/sql/set_variable.rs @@ -458,15 +458,8 @@ async fn set_time_zone_bad_time_zone_format() { plan_and_collect(&ctx, "SELECT '2000-01-01T00:00:00'::TIMESTAMP::TIMESTAMPTZ") .await .unwrap(); - - let expected = vec![ - "+-----------------------------+", - "| Utf8(\"2000-01-01T00:00:00\") |", - "+-----------------------------+", - "| 2000-01-01T08:00:00+08:00 |", - "+-----------------------------+", - ]; - assert_batches_eq!(expected, &result); + let batch_pretty = pretty_format_batches(&result).unwrap().to_string(); + assert_eq!(batch_pretty, "+-----------------------------+\n| Utf8(\"2000-01-01T00:00:00\") |\n+-----------------------------+\n| 2000-01-01T08:00:00+08:00 |\n+-----------------------------+"); // this is invalid even after we support named time zone plan_and_collect(&ctx, "SET TIME ZONE = 'Asia/Taipei2'") diff --git a/datafusion/core/tests/sql/timestamp.rs b/datafusion/core/tests/sql/timestamp.rs index 128ee1639e3f..e3c2ef6dff65 100644 --- a/datafusion/core/tests/sql/timestamp.rs +++ b/datafusion/core/tests/sql/timestamp.rs @@ -1691,3 +1691,29 @@ async fn test_ts_dt_binary_ops() -> Result<()> { Ok(()) } + +// Cannot remove to sqllogictest, timezone support is not ready there. +#[tokio::test] +async fn timestamp_sub_with_tz() -> Result<()> { + let ctx = SessionContext::new(); + let table_a = make_timestamp_tz_sub_table::( + Some("America/Los_Angeles".to_string()), + Some("Europe/Istanbul".to_string()), + )?; + ctx.register_table("table_a", table_a)?; + + let sql = "SELECT val, ts1 - ts2 AS ts_diff FROM table_a ORDER BY ts2 - ts1"; + let actual = execute_to_batches(&ctx, sql).await; + let expected = vec![ + "+-----+---------------------------------------------------+", + "| val | ts_diff |", + "+-----+---------------------------------------------------+", + "| 3 | 0 years 0 mons 0 days 10 hours 0 mins 30.000 secs |", + "| 1 | 0 years 0 mons 0 days 10 hours 0 mins 20.000 secs |", + "| 2 | 0 years 0 mons 0 days 10 hours 0 mins 10.000 secs |", + "+-----+---------------------------------------------------+", + ]; + assert_batches_eq!(expected, &actual); + + Ok(()) +} diff --git a/datafusion/core/tests/sqllogictests/test_files/timestamps.slt b/datafusion/core/tests/sqllogictests/test_files/timestamps.slt index c533951e41ed..5f5ea4e6130a 100644 --- a/datafusion/core/tests/sqllogictests/test_files/timestamps.slt +++ b/datafusion/core/tests/sqllogictests/test_files/timestamps.slt @@ -261,6 +261,110 @@ SELECT INTERVAL '8' MONTH + '2000-01-01T00:00:00'::timestamp; ---- 2000-09-01T00:00:00 +# Interval columns are created with timestamp subtraction in subquery since they are not supported yet +statement ok +create table foo (val int, ts1 timestamp, ts2 timestamp) as values +(1, '2023-03-15T15:00:20.000000123'::timestamp, '2023-01-20T23:00:00.000000099'::timestamp), +(2, '2023-02-28T12:01:55.000123456'::timestamp, '2000-02-23T11:00:00.123000001'::timestamp), +(3, '2033-11-02T23:22:13.000123456'::timestamp, '1990-03-01T00:00:00.333000001'::timestamp), +(4, '2003-07-11T01:31:15.000123456'::timestamp, '2045-04-11T15:00:00.000000001'::timestamp); + +# Timestamp - Timestamp +query I? +SELECT val, ts1 - ts2 FROM foo ORDER BY ts2 - ts1; +---- +4 0 years 0 mons -15250 days -13 hours -28 mins -44.999876545 secs +3 0 years 0 mons 15952 days 23 hours 22 mins 12.667123455 secs +2 0 years 0 mons 8406 days 1 hours 1 mins 54.877123455 secs +1 0 years 0 mons 53 days 16 hours 0 mins 20.000000024 secs + +# Interval - Interval +query ? +SELECT subq.interval1 - subq.interval2 +FROM ( + SELECT ts1 - ts2 AS interval1, + ts2 - ts1 AS interval2 + FROM foo +) AS subq; +---- +0 years 0 mons 106 days 32 hours 0 mins 40.000000048 secs +0 years 0 mons 16812 days 2 hours 3 mins 49.754246910 secs +0 years 0 mons 31904 days 46 hours 44 mins 25.334246910 secs +0 years 0 mons -30500 days -26 hours -57 mins -29.999753090 secs + +# Interval + Interval +query ? +SELECT subq.interval1 + subq.interval2 +FROM ( + SELECT ts1 - ts2 AS interval1, + ts2 - ts1 AS interval2 + FROM foo +) AS subq; +---- +0 years 0 mons 0 days 0 hours 0 mins 0.000000000 secs +0 years 0 mons 0 days 0 hours 0 mins 0.000000000 secs +0 years 0 mons 0 days 0 hours 0 mins 0.000000000 secs +0 years 0 mons 0 days 0 hours 0 mins 0.000000000 secs + +# Timestamp - Interval +query P +SELECT subq.ts1 - subq.interval1 +FROM ( + SELECT ts1, + ts1 - ts2 AS interval1 + FROM foo +) AS subq; +---- +2023-01-20T23:00:00.000000099 +2000-02-23T11:00:00.123000001 +1990-03-01T00:00:00.333000001 +2045-04-11T15:00:00.000000001 + +# Interval + Timestamp +query P +SELECT subq.interval1 + subq.ts1 +FROM ( + SELECT ts1, + ts1 - ts2 AS interval1 + FROM foo +) AS subq; +---- +2023-05-08T07:00:40.000000147 +2046-03-05T13:03:49.877246911 +2077-07-07T22:44:25.667246911 +1961-10-08T12:02:30.000246911 + +# Timestamp + Interval +query P +SELECT subq.ts1 + subq.interval1 +FROM ( + SELECT ts1, + ts1 - ts2 AS interval1 + FROM foo +) AS subq; +---- +2023-05-08T07:00:40.000000147 +2046-03-05T13:03:49.877246911 +2077-07-07T22:44:25.667246911 +1961-10-08T12:02:30.000246911 + +# Timestamp + Timestamp => error +statement error DataFusion error: Error during planning: Timestamp\(Nanosecond, None\) Timestamp\(Nanosecond, None\) is an unsupported operation. addition/subtraction on dates/timestamps only supported with interval types +SELECT ts1 + ts2 +FROM foo; + +# Interval - Timestamp => error +statement error DataFusion error: Error during planning: Interval\(MonthDayNano\) - Timestamp\(Nanosecond, None\) can't be evaluated because there isn't a common type to coerce the types to +SELECT subq.interval1 - subq.ts1 +FROM ( + SELECT ts1, + ts1 - ts2 AS interval1 + FROM foo +) AS subq; + +statement ok +drop table foo; + # timestamptz to utf8 conversion query BBBB SELECT diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs index 5ee66837ec16..9c6eefe9e7c9 100644 --- a/datafusion/expr/src/type_coercion/binary.rs +++ b/datafusion/expr/src/type_coercion/binary.rs @@ -21,7 +21,7 @@ use crate::type_coercion::{is_date, is_interval, is_numeric, is_timestamp}; use crate::Operator; use arrow::compute::can_cast_types; use arrow::datatypes::{ - DataType, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, + DataType, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, }; use datafusion_common::DataFusionError; use datafusion_common::Result; @@ -113,11 +113,17 @@ pub fn coerce_types( | Operator::Gt | Operator::GtEq | Operator::LtEq => comparison_coercion(lhs_type, rhs_type), + // interval - timestamp is an erroneous case, cannot coerce a type Operator::Plus | Operator::Minus - if is_date(lhs_type) + if (is_date(lhs_type) || is_date(rhs_type) || is_timestamp(lhs_type) - || is_timestamp(rhs_type) => + || is_timestamp(rhs_type) + || is_interval(lhs_type) + || is_interval(rhs_type)) + && (!is_interval(lhs_type) + || !is_timestamp(rhs_type) + || *op != Operator::Minus) => { temporal_add_sub_coercion(lhs_type, rhs_type, op)? } @@ -143,7 +149,7 @@ pub fn coerce_types( match result { None => Err(DataFusionError::Plan( format!( - "'{lhs_type:?} {op} {rhs_type:?}' can't be evaluated because there isn't a common type to coerce the types to" + "{lhs_type:?} {op} {rhs_type:?} can't be evaluated because there isn't a common type to coerce the types to" ), )), Some(t) => Ok(t) @@ -204,33 +210,86 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option Result> { - // interval + date or timestamp - if is_interval(lhs_type) && (is_date(rhs_type) || is_timestamp(rhs_type)) { - return Ok(Some(rhs_type.clone())); + match (lhs_type, rhs_type, op) { + // if an interval is being added/subtracted from a date/timestamp, return the date/timestamp data type + (lhs, rhs, _) if is_interval(lhs) && (is_date(rhs) || is_timestamp(rhs)) => { + Ok(Some(rhs.clone())) + } + (lhs, rhs, _) if is_interval(rhs) && (is_date(lhs) || is_timestamp(lhs)) => { + Ok(Some(lhs.clone())) + } + // if two timestamps are being subtracted, check their time units and return the corresponding interval data type + (lhs, rhs, Operator::Minus) if is_timestamp(lhs) && is_timestamp(rhs) => { + handle_timestamp_minus(lhs, rhs) + } + // if two intervals are being added/subtracted, check their interval units and return the corresponding interval data type + (lhs, rhs, _) if is_interval(lhs) && is_interval(rhs) => handle_interval_addition(lhs, rhs), + // if two date/timestamp are being added/subtracted, return an error indicating that the operation is not supported + (lhs, rhs, _) if (is_date(lhs) || is_timestamp(lhs)) && (is_date(rhs) || is_timestamp(rhs)) => { + Err(DataFusionError::Plan(format!( + "{:?} {:?} is an unsupported operation. addition/subtraction on dates/timestamps only supported with interval types", + lhs_type, rhs_type + ))) + } + // return None if no coercion is possible + _ => Ok(None), } +} - // date or timestamp + interval - if is_interval(rhs_type) && (is_date(lhs_type) || is_timestamp(lhs_type)) { - return Ok(Some(lhs_type.clone())); +// This function checks if two interval data types have the same interval unit and returns an interval data type +// representing the sum of them. If the two interval data types have different units, it returns an interval data type +// with "IntervalUnit::MonthDayNano". If the two interval data types are already "IntervalUnit::YearMonth" or "IntervalUnit::DayTime", +// it returns an interval data type with the same unit as the operands. +fn handle_interval_addition(lhs: &DataType, rhs: &DataType) -> Result> { + match (lhs, rhs) { + // operation with the same types + ( + DataType::Interval(IntervalUnit::YearMonth), + DataType::Interval(IntervalUnit::YearMonth), + ) => Ok(Some(DataType::Interval(IntervalUnit::YearMonth))), + ( + DataType::Interval(IntervalUnit::DayTime), + DataType::Interval(IntervalUnit::DayTime), + ) => Ok(Some(DataType::Interval(IntervalUnit::DayTime))), + // operation with MonthDayNano's or different types + (_, _) => Ok(Some(DataType::Interval(IntervalUnit::MonthDayNano))), } +} - // date or timestamp + date or timestamp - if (is_date(lhs_type) || is_timestamp(lhs_type)) - && (is_date(rhs_type) || is_timestamp(rhs_type)) - { - return Err(DataFusionError::Plan( - format!( - "'{lhs_type:?} {op} {rhs_type:?}' is an unsupported operation. \ - addition/subtraction on dates/timestamps only supported with interval types" - ),)); +// This function checks if two timestamp data types have the same time unit and returns an interval data type +// representing the difference between them, either "IntervalUnit::DayTime" if the time unit is second or millisecond, +// or "IntervalUnit::MonthDayNano" if the time unit is microsecond or nanosecond. If the two timestamp data types have +// different time units, it returns an error indicating that "The timestamps have different types". +fn handle_timestamp_minus(lhs: &DataType, rhs: &DataType) -> Result> { + match (lhs, rhs) { + ( + DataType::Timestamp(TimeUnit::Second, _), + DataType::Timestamp(TimeUnit::Second, _), + ) + | ( + DataType::Timestamp(TimeUnit::Millisecond, _), + DataType::Timestamp(TimeUnit::Millisecond, _), + ) => Ok(Some(DataType::Interval(IntervalUnit::DayTime))), + ( + DataType::Timestamp(TimeUnit::Microsecond, _), + DataType::Timestamp(TimeUnit::Microsecond, _), + ) + | ( + DataType::Timestamp(TimeUnit::Nanosecond, _), + DataType::Timestamp(TimeUnit::Nanosecond, _), + ) => Ok(Some(DataType::Interval(IntervalUnit::MonthDayNano))), + (_, _) => Err(DataFusionError::Plan( + "The timestamps have different types".to_string(), + )), } - Ok(None) } /// Returns the output type of applying numeric operations such as `=` @@ -769,7 +828,7 @@ mod tests { coerce_types(&DataType::Float32, &Operator::Plus, &DataType::Utf8); if let Err(DataFusionError::Plan(e)) = result_type { - assert_eq!(e, "'Float32 + Utf8' can't be evaluated because there isn't a common type to coerce the types to"); + assert_eq!(e, "Float32 + Utf8 can't be evaluated because there isn't a common type to coerce the types to"); Ok(()) } else { Err(DataFusionError::Internal( @@ -941,16 +1000,16 @@ mod tests { let err = coerce_types( &DataType::Timestamp(TimeUnit::Nanosecond, None), &Operator::Minus, - &DataType::Timestamp(TimeUnit::Nanosecond, None), + &DataType::Timestamp(TimeUnit::Millisecond, None), ) .unwrap_err() .to_string(); - assert_contains!(&err, "'Timestamp(Nanosecond, None) - Timestamp(Nanosecond, None)' is an unsupported operation. addition/subtraction on dates/timestamps only supported with interval types"); + assert_contains!(&err, "The timestamps have different types"); let err = coerce_types(&DataType::Date32, &Operator::Plus, &DataType::Date64) .unwrap_err() .to_string(); - assert_contains!(&err, "'Date32 + Date64' is an unsupported operation. addition/subtraction on dates/timestamps only supported with interval types"); + assert_contains!(&err, "Date32 Date64 is an unsupported operation. addition/subtraction on dates/timestamps only supported with interval types"); Ok(()) } diff --git a/datafusion/optimizer/src/type_coercion.rs b/datafusion/optimizer/src/type_coercion.rs index c132d11aa7f8..0be9c89b6ccb 100644 --- a/datafusion/optimizer/src/type_coercion.rs +++ b/datafusion/optimizer/src/type_coercion.rs @@ -945,7 +945,7 @@ mod test { let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty)?); let err = assert_optimized_plan_eq(&plan, ""); assert!(err.is_err()); - assert!(err.unwrap_err().to_string().contains("'Int64 IS DISTINCT FROM Boolean' can't be evaluated because there isn't a common type to coerce the types to")); + assert!(err.unwrap_err().to_string().contains("Int64 IS DISTINCT FROM Boolean can't be evaluated because there isn't a common type to coerce the types to")); // is not true let expr = col("a").is_not_true(); @@ -1047,7 +1047,7 @@ mod test { let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty)?); let err = assert_optimized_plan_eq(&plan, expected); assert!(err.is_err()); - assert!(err.unwrap_err().to_string().contains("'Utf8 IS NOT DISTINCT FROM Boolean' can't be evaluated because there isn't a common type to coerce the types to")); + assert!(err.unwrap_err().to_string().contains("Utf8 IS NOT DISTINCT FROM Boolean can't be evaluated because there isn't a common type to coerce the types to")); // is not unknown let expr = col("a").is_not_unknown(); diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml index c99b07077b96..22a265370149 100644 --- a/datafusion/physical-expr/Cargo.toml +++ b/datafusion/physical-expr/Cargo.toml @@ -44,6 +44,7 @@ unicode_expressions = ["unicode-segmentation"] [dependencies] ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] } arrow = { workspace = true } +arrow-array = { version = "34.0.0", default-features = false, features = ["chrono-tz"] } arrow-buffer = { workspace = true } arrow-schema = { workspace = true } blake2 = { version = "^0.10.2", optional = true } diff --git a/datafusion/physical-expr/src/expressions/datetime.rs b/datafusion/physical-expr/src/expressions/datetime.rs index 655cb07f03b1..518a28268765 100644 --- a/datafusion/physical-expr/src/expressions/datetime.rs +++ b/datafusion/physical-expr/src/expressions/datetime.rs @@ -17,24 +17,22 @@ use crate::physical_expr::down_cast_any_ref; use crate::PhysicalExpr; -use arrow::array::{Array, ArrayRef}; -use arrow::compute::unary; +use arrow::array::{Array, ArrayRef, PrimitiveArray}; +use arrow::compute::{binary, unary}; use arrow::datatypes::{ - DataType, Date32Type, Date64Type, Schema, TimeUnit, TimestampMicrosecondType, - TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, + ArrowNativeTypeOp, DataType, Date32Type, Date64Type, IntervalDayTimeType, + IntervalMonthDayNanoType, IntervalYearMonthType, Schema, TimeUnit, + TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, + TimestampSecondType, }; use arrow::record_batch::RecordBatch; -use datafusion_common::cast::{ - as_date32_array, as_date64_array, as_timestamp_microsecond_array, - as_timestamp_millisecond_array, as_timestamp_nanosecond_array, - as_timestamp_second_array, -}; -use datafusion_common::scalar::{ - date32_add, date64_add, microseconds_add, milliseconds_add, nanoseconds_add, - seconds_add, -}; +use arrow_schema::IntervalUnit; +use chrono::NaiveDateTime; +use datafusion_common::cast::*; +use datafusion_common::scalar::*; use datafusion_common::Result; use datafusion_common::{DataFusionError, ScalarValue}; +use datafusion_expr::type_coercion::binary::coerce_types; use datafusion_expr::{ColumnarValue, Operator}; use std::any::Any; use std::fmt::{Display, Formatter}; @@ -59,27 +57,29 @@ impl DateTimeIntervalExpr { rhs: Arc, input_schema: &Schema, ) -> Result { - match lhs.data_type(input_schema)? { - DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _) => { - match rhs.data_type(input_schema)? { - DataType::Interval(_) => match &op { - Operator::Plus | Operator::Minus => Ok(Self { - lhs, - op, - rhs, - input_schema: input_schema.clone(), - }), - _ => Err(DataFusionError::Execution(format!( - "Invalid operator '{op}' for DateIntervalExpr" - ))), - }, - other => Err(DataFusionError::Execution(format!( - "Operation '{op}' not support for type {other}" - ))), - } - } - other => Err(DataFusionError::Execution(format!( - "Invalid lhs type '{other}' for DateIntervalExpr" + match ( + lhs.data_type(input_schema)?, + op, + rhs.data_type(input_schema)?, + ) { + ( + DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _), + Operator::Plus | Operator::Minus, + DataType::Interval(_), + ) + | (DataType::Timestamp(_, _), Operator::Minus, DataType::Timestamp(_, _)) + | ( + DataType::Interval(_), + Operator::Plus | Operator::Minus, + DataType::Interval(_), + ) => Ok(Self { + lhs, + op, + rhs, + input_schema: input_schema.clone(), + }), + (lhs, _, rhs) => Err(DataFusionError::Execution(format!( + "Invalid operation between '{lhs}' and '{rhs}' for DateIntervalExpr" ))), } } @@ -112,7 +112,11 @@ impl PhysicalExpr for DateTimeIntervalExpr { } fn data_type(&self, input_schema: &Schema) -> Result { - self.lhs.data_type(input_schema) + coerce_types( + &self.lhs.data_type(input_schema)?, + &Operator::Minus, + &self.rhs.data_type(input_schema)?, + ) } fn nullable(&self, input_schema: &Schema) -> Result { @@ -120,18 +124,8 @@ impl PhysicalExpr for DateTimeIntervalExpr { } fn evaluate(&self, batch: &RecordBatch) -> Result { - let dates = self.lhs.evaluate(batch)?; - let intervals = self.rhs.evaluate(batch)?; - - // Unwrap interval to add - let intervals = match &intervals { - ColumnarValue::Scalar(interval) => interval, - _ => { - let msg = "Columnar execution is not yet supported for DateIntervalExpr"; - return Err(DataFusionError::Execution(msg.to_string())); - } - }; - + let lhs_value = self.lhs.evaluate(batch)?; + let rhs_value = self.rhs.evaluate(batch)?; // Invert sign for subtraction let sign = match self.op { Operator::Plus => 1, @@ -142,14 +136,30 @@ impl PhysicalExpr for DateTimeIntervalExpr { return Err(DataFusionError::Internal(msg.to_string())); } }; + // RHS is first checked. If it is a Scalar, there are 2 options: + // Either LHS is also a Scalar and matching operation is applied, + // or LHS is an Array and unary operations for related types are + // applied in evaluate_array function. If RHS is an Array, then + // LHS must also be, moreover; they must be the same Timestamp type. + match (lhs_value, rhs_value) { + (ColumnarValue::Scalar(operand_lhs), ColumnarValue::Scalar(operand_rhs)) => { + Ok(ColumnarValue::Scalar(if sign > 0 { + operand_lhs.add(&operand_rhs)? + } else { + operand_lhs.sub(&operand_rhs)? + })) + } + (ColumnarValue::Array(array_lhs), ColumnarValue::Scalar(operand_rhs)) => { + evaluate_array(array_lhs, sign, &operand_rhs) + } - match dates { - ColumnarValue::Scalar(operand) => Ok(ColumnarValue::Scalar(if sign > 0 { - operand.add(intervals)? - } else { - operand.sub(intervals)? - })), - ColumnarValue::Array(array) => evaluate_array(array, sign, intervals), + (ColumnarValue::Array(array_lhs), ColumnarValue::Array(array_rhs)) => { + evaluate_temporal_arrays(&array_lhs, sign, &array_rhs) + } + (_, _) => { + let msg = "If RHS of the operation is an array, then LHS also must be"; + Err(DataFusionError::Internal(msg.to_string())) + } } } @@ -239,6 +249,487 @@ pub fn evaluate_array( Ok(ColumnarValue::Array(ret)) } +macro_rules! ts_sub_op { + ($lhs:ident, $rhs:ident, $lhs_tz:ident, $rhs_tz:ident, $coef:expr, $caster:expr, $op:expr, $ts_unit:expr, $mode:expr, $type_out:ty) => {{ + let prim_array_lhs = $caster(&$lhs)?; + let prim_array_rhs = $caster(&$rhs)?; + let ret: PrimitiveArray<$type_out> = + arrow::compute::try_binary(prim_array_lhs, prim_array_rhs, |ts1, ts2| { + let (parsed_lhs_tz, parsed_rhs_tz) = + (parse_timezones($lhs_tz)?, parse_timezones($rhs_tz)?); + let (naive_lhs, naive_rhs) = calculate_naives::<$mode>( + ts1.mul_wrapping($coef), + parsed_lhs_tz, + ts2.mul_wrapping($coef), + parsed_rhs_tz, + )?; + Ok($op($ts_unit(&naive_lhs), $ts_unit(&naive_rhs))) + })?; + Arc::new(ret) as ArrayRef + }}; +} +macro_rules! interval_op { + ($lhs:ident, $rhs:ident, $caster:expr, $op:expr, $sign:ident, $type_in:ty) => {{ + let prim_array_lhs = $caster(&$lhs)?; + let prim_array_rhs = $caster(&$rhs)?; + let ret = Arc::new(binary::<$type_in, $type_in, _, $type_in>( + prim_array_lhs, + prim_array_rhs, + |interval1, interval2| $op(interval1, interval2, $sign), + )?) as ArrayRef; + ret + }}; +} +macro_rules! interval_cross_op { + ($lhs:ident, $rhs:ident, $caster1:expr, $caster2:expr, $op:expr, $sign:ident, $commute:ident, $type_in1:ty, $type_in2:ty) => {{ + let prim_array_lhs = $caster1(&$lhs)?; + let prim_array_rhs = $caster2(&$rhs)?; + let ret = Arc::new(binary::<$type_in1, $type_in2, _, IntervalMonthDayNanoType>( + prim_array_lhs, + prim_array_rhs, + |interval1, interval2| $op(interval1, interval2, $sign, $commute), + )?) as ArrayRef; + ret + }}; +} +macro_rules! ts_interval_op { + ($lhs:ident, $rhs:ident, $tz:ident, $caster1:expr, $caster2:expr, $op:expr, $sign:ident, $type_in1:ty, $type_in2:ty) => {{ + let prim_array_lhs = $caster1(&$lhs)?; + let prim_array_rhs = $caster2(&$rhs)?; + let ret: PrimitiveArray<$type_in1> = arrow::compute::try_binary( + prim_array_lhs, + prim_array_rhs, + |ts, interval| Ok($op(ts, interval as i128, $sign)?), + )?; + Arc::new(ret.with_timezone_opt($tz.clone())) as ArrayRef + }}; +} +// This function evaluates temporal array operations, such as timestamp - timestamp, interval + interval, +// timestamp + interval, and interval + timestamp. It takes two arrays as input and an integer sign representing +// the operation (+1 for addition and -1 for subtraction). It returns a ColumnarValue as output, which can hold +// either a scalar or an array. +pub fn evaluate_temporal_arrays( + array_lhs: &ArrayRef, + sign: i32, + array_rhs: &ArrayRef, +) -> Result { + let ret = match (array_lhs.data_type(), array_rhs.data_type()) { + // Timestamp - Timestamp operations, operands of only the same types are supported. + (DataType::Timestamp(_, _), DataType::Timestamp(_, _)) => { + ts_array_op(array_lhs, array_rhs)? + } + // Interval (+ , -) Interval operations + (DataType::Interval(_), DataType::Interval(_)) => { + interval_array_op(array_lhs, array_rhs, sign)? + } + // Timestamp (+ , -) Interval and Interval + Timestamp operations + // Interval - Timestamp operation is not rational hence not supported + (DataType::Timestamp(_, _), DataType::Interval(_)) => { + ts_interval_array_op(array_lhs, sign, array_rhs)? + } + (DataType::Interval(_), DataType::Timestamp(_, _)) if sign == 1 => { + ts_interval_array_op(array_rhs, sign, array_lhs)? + } + (_, _) => Err(DataFusionError::Execution(format!( + "Invalid array types for DateIntervalExpr: {:?} {} {:?}", + array_lhs.data_type(), + sign, + array_rhs.data_type() + )))?, + }; + Ok(ColumnarValue::Array(ret)) +} + +/// Performs a timestamp subtraction operation on two arrays and returns the resulting array. +fn ts_array_op(array_lhs: &ArrayRef, array_rhs: &ArrayRef) -> Result { + match (array_lhs.data_type(), array_rhs.data_type()) { + ( + DataType::Timestamp(TimeUnit::Second, opt_tz_lhs), + DataType::Timestamp(TimeUnit::Second, opt_tz_rhs), + ) => Ok(ts_sub_op!( + array_lhs, + array_rhs, + opt_tz_lhs, + opt_tz_rhs, + 1000i64, + as_timestamp_second_array, + seconds_sub, + NaiveDateTime::timestamp, + MILLISECOND_MODE, + IntervalDayTimeType + )), + ( + DataType::Timestamp(TimeUnit::Millisecond, opt_tz_lhs), + DataType::Timestamp(TimeUnit::Millisecond, opt_tz_rhs), + ) => Ok(ts_sub_op!( + array_lhs, + array_rhs, + opt_tz_lhs, + opt_tz_rhs, + 1i64, + as_timestamp_millisecond_array, + milliseconds_sub, + NaiveDateTime::timestamp_millis, + MILLISECOND_MODE, + IntervalDayTimeType + )), + ( + DataType::Timestamp(TimeUnit::Microsecond, opt_tz_lhs), + DataType::Timestamp(TimeUnit::Microsecond, opt_tz_rhs), + ) => Ok(ts_sub_op!( + array_lhs, + array_rhs, + opt_tz_lhs, + opt_tz_rhs, + 1000i64, + as_timestamp_microsecond_array, + microseconds_sub, + NaiveDateTime::timestamp_micros, + NANOSECOND_MODE, + IntervalMonthDayNanoType + )), + ( + DataType::Timestamp(TimeUnit::Nanosecond, opt_tz_lhs), + DataType::Timestamp(TimeUnit::Nanosecond, opt_tz_rhs), + ) => Ok(ts_sub_op!( + array_lhs, + array_rhs, + opt_tz_lhs, + opt_tz_rhs, + 1i64, + as_timestamp_nanosecond_array, + nanoseconds_sub, + NaiveDateTime::timestamp_nanos, + NANOSECOND_MODE, + IntervalMonthDayNanoType + )), + (_, _) => Err(DataFusionError::Execution(format!( + "Invalid array types for Timestamp subtraction: {:?} - {:?}", + array_lhs.data_type(), + array_rhs.data_type() + ))), + } +} +/// Performs an interval operation on two arrays and returns the resulting array. +/// The operation sign determines whether to perform addition or subtraction. +/// The data type and unit of the two input arrays must match the supported combinations. +fn interval_array_op( + array_lhs: &ArrayRef, + array_rhs: &ArrayRef, + sign: i32, +) -> Result { + match (array_lhs.data_type(), array_rhs.data_type()) { + ( + DataType::Interval(IntervalUnit::YearMonth), + DataType::Interval(IntervalUnit::YearMonth), + ) => Ok(interval_op!( + array_lhs, + array_rhs, + as_interval_ym_array, + op_ym, + sign, + IntervalYearMonthType + )), + ( + DataType::Interval(IntervalUnit::YearMonth), + DataType::Interval(IntervalUnit::DayTime), + ) => Ok(interval_cross_op!( + array_lhs, + array_rhs, + as_interval_ym_array, + as_interval_dt_array, + op_ym_dt, + sign, + false, + IntervalYearMonthType, + IntervalDayTimeType + )), + ( + DataType::Interval(IntervalUnit::YearMonth), + DataType::Interval(IntervalUnit::MonthDayNano), + ) => Ok(interval_cross_op!( + array_lhs, + array_rhs, + as_interval_ym_array, + as_interval_mdn_array, + op_ym_mdn, + sign, + false, + IntervalYearMonthType, + IntervalMonthDayNanoType + )), + ( + DataType::Interval(IntervalUnit::DayTime), + DataType::Interval(IntervalUnit::YearMonth), + ) => Ok(interval_cross_op!( + array_rhs, + array_lhs, + as_interval_ym_array, + as_interval_dt_array, + op_ym_dt, + sign, + true, + IntervalYearMonthType, + IntervalDayTimeType + )), + ( + DataType::Interval(IntervalUnit::DayTime), + DataType::Interval(IntervalUnit::DayTime), + ) => Ok(interval_op!( + array_lhs, + array_rhs, + as_interval_dt_array, + op_dt, + sign, + IntervalDayTimeType + )), + ( + DataType::Interval(IntervalUnit::DayTime), + DataType::Interval(IntervalUnit::MonthDayNano), + ) => Ok(interval_cross_op!( + array_lhs, + array_rhs, + as_interval_dt_array, + as_interval_mdn_array, + op_dt_mdn, + sign, + false, + IntervalDayTimeType, + IntervalMonthDayNanoType + )), + ( + DataType::Interval(IntervalUnit::MonthDayNano), + DataType::Interval(IntervalUnit::YearMonth), + ) => Ok(interval_cross_op!( + array_rhs, + array_lhs, + as_interval_ym_array, + as_interval_mdn_array, + op_ym_mdn, + sign, + true, + IntervalYearMonthType, + IntervalMonthDayNanoType + )), + ( + DataType::Interval(IntervalUnit::MonthDayNano), + DataType::Interval(IntervalUnit::DayTime), + ) => Ok(interval_cross_op!( + array_rhs, + array_lhs, + as_interval_dt_array, + as_interval_mdn_array, + op_dt_mdn, + sign, + true, + IntervalDayTimeType, + IntervalMonthDayNanoType + )), + ( + DataType::Interval(IntervalUnit::MonthDayNano), + DataType::Interval(IntervalUnit::MonthDayNano), + ) => Ok(interval_op!( + array_lhs, + array_rhs, + as_interval_mdn_array, + op_mdn, + sign, + IntervalMonthDayNanoType + )), + (_, _) => Err(DataFusionError::Execution(format!( + "Invalid array types for Interval operation: {:?} {} {:?}", + array_lhs.data_type(), + sign, + array_rhs.data_type() + ))), + } +} +/// Performs a timestamp/interval operation on two arrays and returns the resulting array. +/// The operation sign determines whether to perform addition or subtraction. +/// The data type and unit of the two input arrays must match the supported combinations. +fn ts_interval_array_op( + array_lhs: &ArrayRef, + sign: i32, + array_rhs: &ArrayRef, +) -> Result { + match (array_lhs.data_type(), array_rhs.data_type()) { + ( + DataType::Timestamp(TimeUnit::Second, tz), + DataType::Interval(IntervalUnit::YearMonth), + ) => Ok(ts_interval_op!( + array_lhs, + array_rhs, + tz, + as_timestamp_second_array, + as_interval_ym_array, + seconds_add_array::, + sign, + TimestampSecondType, + IntervalYearMonthType + )), + ( + DataType::Timestamp(TimeUnit::Second, tz), + DataType::Interval(IntervalUnit::DayTime), + ) => Ok(ts_interval_op!( + array_lhs, + array_rhs, + tz, + as_timestamp_second_array, + as_interval_dt_array, + seconds_add_array::, + sign, + TimestampSecondType, + IntervalDayTimeType + )), + ( + DataType::Timestamp(TimeUnit::Second, tz), + DataType::Interval(IntervalUnit::MonthDayNano), + ) => Ok(ts_interval_op!( + array_lhs, + array_rhs, + tz, + as_timestamp_second_array, + as_interval_mdn_array, + seconds_add_array::, + sign, + TimestampSecondType, + IntervalMonthDayNanoType + )), + ( + DataType::Timestamp(TimeUnit::Millisecond, tz), + DataType::Interval(IntervalUnit::YearMonth), + ) => Ok(ts_interval_op!( + array_lhs, + array_rhs, + tz, + as_timestamp_millisecond_array, + as_interval_ym_array, + milliseconds_add_array::, + sign, + TimestampMillisecondType, + IntervalYearMonthType + )), + ( + DataType::Timestamp(TimeUnit::Millisecond, tz), + DataType::Interval(IntervalUnit::DayTime), + ) => Ok(ts_interval_op!( + array_lhs, + array_rhs, + tz, + as_timestamp_millisecond_array, + as_interval_dt_array, + milliseconds_add_array::, + sign, + TimestampMillisecondType, + IntervalDayTimeType + )), + ( + DataType::Timestamp(TimeUnit::Millisecond, tz), + DataType::Interval(IntervalUnit::MonthDayNano), + ) => Ok(ts_interval_op!( + array_lhs, + array_rhs, + tz, + as_timestamp_millisecond_array, + as_interval_mdn_array, + milliseconds_add_array::, + sign, + TimestampMillisecondType, + IntervalMonthDayNanoType + )), + ( + DataType::Timestamp(TimeUnit::Microsecond, tz), + DataType::Interval(IntervalUnit::YearMonth), + ) => Ok(ts_interval_op!( + array_lhs, + array_rhs, + tz, + as_timestamp_microsecond_array, + as_interval_ym_array, + microseconds_add_array::, + sign, + TimestampMicrosecondType, + IntervalYearMonthType + )), + ( + DataType::Timestamp(TimeUnit::Microsecond, tz), + DataType::Interval(IntervalUnit::DayTime), + ) => Ok(ts_interval_op!( + array_lhs, + array_rhs, + tz, + as_timestamp_microsecond_array, + as_interval_dt_array, + microseconds_add_array::, + sign, + TimestampMicrosecondType, + IntervalDayTimeType + )), + ( + DataType::Timestamp(TimeUnit::Microsecond, tz), + DataType::Interval(IntervalUnit::MonthDayNano), + ) => Ok(ts_interval_op!( + array_lhs, + array_rhs, + tz, + as_timestamp_microsecond_array, + as_interval_mdn_array, + microseconds_add_array::, + sign, + TimestampMicrosecondType, + IntervalMonthDayNanoType + )), + ( + DataType::Timestamp(TimeUnit::Nanosecond, tz), + DataType::Interval(IntervalUnit::YearMonth), + ) => Ok(ts_interval_op!( + array_lhs, + array_rhs, + tz, + as_timestamp_nanosecond_array, + as_interval_ym_array, + nanoseconds_add_array::, + sign, + TimestampNanosecondType, + IntervalYearMonthType + )), + ( + DataType::Timestamp(TimeUnit::Nanosecond, tz), + DataType::Interval(IntervalUnit::DayTime), + ) => Ok(ts_interval_op!( + array_lhs, + array_rhs, + tz, + as_timestamp_nanosecond_array, + as_interval_dt_array, + nanoseconds_add_array::, + sign, + TimestampNanosecondType, + IntervalDayTimeType + )), + ( + DataType::Timestamp(TimeUnit::Nanosecond, tz), + DataType::Interval(IntervalUnit::MonthDayNano), + ) => Ok(ts_interval_op!( + array_lhs, + array_rhs, + tz, + as_timestamp_nanosecond_array, + as_interval_mdn_array, + nanoseconds_add_array::, + sign, + TimestampNanosecondType, + IntervalMonthDayNanoType + )), + (_, _) => Err(DataFusionError::Execution(format!( + "Invalid array types for Timestamp Interval operation: {:?} {} {:?}", + array_lhs.data_type(), + sign, + array_rhs.data_type() + ))), + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs index 1fbbced61639..0266ecfd2e51 100644 --- a/datafusion/physical-expr/src/planner.rs +++ b/datafusion/physical-expr/src/planner.rs @@ -170,6 +170,7 @@ pub fn create_physical_expr( ) } Expr::BinaryExpr(BinaryExpr { left, op, right }) => { + // Create physical expressions for left and right operands let lhs = create_physical_expr( left, input_dfschema, @@ -182,6 +183,9 @@ pub fn create_physical_expr( input_schema, execution_props, )?; + // Match the data types and operator to determine the appropriate expression, if + // they are supported temporal types and operations, create DateTimeIntervalExpr, + // else create BinaryExpr. match ( lhs.data_type(input_schema)?, op, @@ -207,6 +211,26 @@ pub fn create_physical_expr( lhs, input_schema, )?)), + ( + DataType::Timestamp(_, _), + Operator::Minus, + DataType::Timestamp(_, _), + ) => Ok(Arc::new(DateTimeIntervalExpr::try_new( + lhs, + *op, + rhs, + input_schema, + )?)), + ( + DataType::Interval(_), + Operator::Plus | Operator::Minus, + DataType::Interval(_), + ) => Ok(Arc::new(DateTimeIntervalExpr::try_new( + lhs, + *op, + rhs, + input_schema, + )?)), _ => { // Note that the logical planner is responsible // for type coercion on the arguments (e.g. if one