diff --git a/CHANGELOG.md b/CHANGELOG.md index 25c4d2a7d30..fa757ea2108 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,306 @@ # Change log -Generated on 2021-12-07 +Generated on 2022-02-07 + +## Release 22.02 + +### Features +||| +|:---|:---| +|[#4305](https://github.com/NVIDIA/spark-rapids/issues/4305)|[FEA] write nvidia tool wrappers to allow old YARN versions to work with MIG| +|[#4360](https://github.com/NVIDIA/spark-rapids/issues/4360)|[FEA] Add explain api for Spark 2.X| +|[#3541](https://github.com/NVIDIA/spark-rapids/issues/3541)|[FEA] Support max on single-level struct in aggregation context| +|[#4238](https://github.com/NVIDIA/spark-rapids/issues/4238)|[FEA] Add a Spark 3.X Explain only mode to the plugin| +|[#3952](https://github.com/NVIDIA/spark-rapids/issues/3952)|[Audit] [FEA][SPARK-32986][SQL] Add bucketed scan info in query plan of data source v1| +|[#4412](https://github.com/NVIDIA/spark-rapids/issues/4412)|[FEA] Improve support for \A, \Z, and \z in regular expressions| +|[#3979](https://github.com/NVIDIA/spark-rapids/issues/3979)|[FEA] Improvements for CPU(Row) based UDF| +|[#4467](https://github.com/NVIDIA/spark-rapids/issues/4467)|[FEA] Add support for regular expression with repeated digits (`\d+`, `\d*`, `\d?`)| +|[#4439](https://github.com/NVIDIA/spark-rapids/issues/4439)|[FEA] Enable GPU broadcast exchange reuse for DPP when AQE enabled| +|[#3512](https://github.com/NVIDIA/spark-rapids/issues/3512)|[FEA] Support org.apache.spark.sql.catalyst.expressions.Sequence| +|[#3475](https://github.com/NVIDIA/spark-rapids/issues/3475)|[FEA] Spark 3.2.0 reads Parquet unsigned int64(UINT64) as Decimal(20,0) but CUDF does not support it | +|[#4091](https://github.com/NVIDIA/spark-rapids/issues/4091)|[FEA] regexp_replace: Improve support for ^ and $| +|[#4104](https://github.com/NVIDIA/spark-rapids/issues/4104)|[FEA] Support org.apache.spark.sql.catalyst.expressions.ReplicateRows| +|[#4027](https://github.com/NVIDIA/spark-rapids/issues/4027)|[FEA] Support SubqueryBroadcast on GPU to enable exchange reuse during DPP| +|[#4284](https://github.com/NVIDIA/spark-rapids/issues/4284)|[FEA] Support idx = 0 in GpuRegExpExtract| +|[#4002](https://github.com/NVIDIA/spark-rapids/issues/4002)|[FEA] Implement regexp_extract on GPU| +|[#3221](https://github.com/NVIDIA/spark-rapids/issues/3221)|[FEA] Support GpuFirst and GpuLast on nested types under reduction aggregations| +|[#3944](https://github.com/NVIDIA/spark-rapids/issues/3944)|[FEA] Full support for sum with overflow on Decimal 128| +|[#4028](https://github.com/NVIDIA/spark-rapids/issues/4028)|[FEA] support GpuCast from non-nested ArrayType to StringType| +|[#3250](https://github.com/NVIDIA/spark-rapids/issues/3250)|[FEA] Make CreateMap duplicate key handling compatible with Spark and enable CreateMap by default| +|[#4170](https://github.com/NVIDIA/spark-rapids/issues/4170)|[FEA] Make regular expression behavior with `$` and `\r` consistent with CPU| +|[#4001](https://github.com/NVIDIA/spark-rapids/issues/4001)|[FEA] Add regexp support to regexp_replace| +|[#3962](https://github.com/NVIDIA/spark-rapids/issues/3962)|[FEA] Support null characters in regular expressions in RLIKE| +|[#3797](https://github.com/NVIDIA/spark-rapids/issues/3797)|[FEA] Make RLike support consistent with Apache Spark| + +### Performance +||| +|:---|:---| +|[#4392](https://github.com/NVIDIA/spark-rapids/issues/4392)|[FEA] could the parquet scan code avoid acquiring the semaphore for an empty batch?| +|[#679](https://github.com/NVIDIA/spark-rapids/issues/679)|[FEA] move some deserialization code out of the scope of the gpu-semaphore to increase cpu concurrent| +|[#4350](https://github.com/NVIDIA/spark-rapids/issues/4350)|[FEA] Optimize the all-true and all-false cases in GPU `If` and `CaseWhen` | +|[#4309](https://github.com/NVIDIA/spark-rapids/issues/4309)|[FEA] Leverage cudf conditional nested loop join to implement semi/anti hash join with condition| +|[#4395](https://github.com/NVIDIA/spark-rapids/issues/4395)|[FEA] acquire the semaphore after concatToHost in GpuShuffleCoalesceIterator| +|[#4134](https://github.com/NVIDIA/spark-rapids/issues/4134)|[FEA] Allow `EliminateJoinToEmptyRelation` in `GpuBroadcastExchangeExec` | +|[#4189](https://github.com/NVIDIA/spark-rapids/issues/4189)|[FEA] understand why between is so expensive| + +### Bugs Fixed +||| +|:---|:---| +|[#4675](https://github.com/NVIDIA/spark-rapids/issues/4675)|[BUG] Jenkins integration build timed out at 10 hours| +|[#4665](https://github.com/NVIDIA/spark-rapids/issues/4665)|[BUG] Spark321Shims.getParquetFilters failed with NoSuchMethodError| +|[#4635](https://github.com/NVIDIA/spark-rapids/issues/4635)|[BUG] nvidia-smi wrapper script ignores ENABLE_NON_MIG_GPUS=1 on a heterogeneous multi-GPU machine| +|[#4500](https://github.com/NVIDIA/spark-rapids/issues/4500)|[BUG] Build failures against Spark 3.2.1 rc1 and make 3.2.1 non snapshot| +|[#4631](https://github.com/NVIDIA/spark-rapids/issues/4631)|[BUG] Release build with mvn option `-P source-javadoc` FAILED| +|[#4625](https://github.com/NVIDIA/spark-rapids/issues/4625)|[BUG] NDS query 5 fails with AdaptiveSparkPlanExec assertion| +|[#4632](https://github.com/NVIDIA/spark-rapids/issues/4632)|[BUG] Build failing for Spark 3.3.0 due to deprecated method warnings| +|[#4599](https://github.com/NVIDIA/spark-rapids/issues/4599)|[BUG] test_group_apply_udf and test_group_apply_udf_more_types hangs on Databricks 9.1| +|[#4600](https://github.com/NVIDIA/spark-rapids/issues/4600)|[BUG] crash if we have a decimal128 in a struct in an array | +|[#4581](https://github.com/NVIDIA/spark-rapids/issues/4581)|[BUG] Build error "GpuOverrides.scala:924: wrong number of arguments" on DB9.1.x spark-3.1.2 | +|[#4593](https://github.com/NVIDIA/spark-rapids/issues/4593)|[BUG] dup GpuHashJoin.diff case-folding issue| +|[#4503](https://github.com/NVIDIA/spark-rapids/issues/4503)|[BUG] regexp_replace with back references produces incorrect results on GPU| +|[#4567](https://github.com/NVIDIA/spark-rapids/issues/4567)|[BUG] Profile tool hangs in compare mode| +|[#4315](https://github.com/NVIDIA/spark-rapids/issues/4315)|[BUG] test_hash_reduction_decimal_overflow_sum[30] failed OOM in integration tests| +|[#4551](https://github.com/NVIDIA/spark-rapids/issues/4551)|[BUG] protobuf-java version changed to 3.x| +|[#4499](https://github.com/NVIDIA/spark-rapids/issues/4499)|[BUG]GpuSequence blows up when nulls exist in any of the inputs (start, stop, step)| +|[#4454](https://github.com/NVIDIA/spark-rapids/issues/4454)|[BUG] Shade warnings when building the tools artifact| +|[#4541](https://github.com/NVIDIA/spark-rapids/issues/4541)|[BUG] Column vector leak in conditionals_test.py| +|[#4514](https://github.com/NVIDIA/spark-rapids/issues/4514)|[BUG] test_hash_reduction_pivot_without_nans failed| +|[#4521](https://github.com/NVIDIA/spark-rapids/issues/4521)|[BUG] Inconsistencies in handling of newline characters and string and line anchors| +|[#4548](https://github.com/NVIDIA/spark-rapids/issues/4548)|[BUG] ai.rapids.cudf.CudaException: an illegal instruction was encountered in databricks 9.1| +|[#4475](https://github.com/NVIDIA/spark-rapids/issues/4475)|[BUG] `\D` and `\W` match newline in Spark but not in cuDF| +|[#4524](https://github.com/NVIDIA/spark-rapids/issues/4524)|[BUG] RegExp transpiler fails to detect some choice expressions that cuDF cannot compile| +|[#3226](https://github.com/NVIDIA/spark-rapids/issues/3226)|[BUG]OOM happened when do cube operations| +|[#2504](https://github.com/NVIDIA/spark-rapids/issues/2504)|[BUG] OOM when running NDS queries with UCX and GDS| +|[#4273](https://github.com/NVIDIA/spark-rapids/issues/4273)|[BUG] Rounding past the size that can be stored in a type produces incorrect results| +|[#4060](https://github.com/NVIDIA/spark-rapids/issues/4060)|[BUG] test_hash_groupby_approx_percentile_long_repeated_keys failed intermittently| +|[#4039](https://github.com/NVIDIA/spark-rapids/issues/4039)|[BUG] Spark 3.3.0 IT Array test failures| +|[#3849](https://github.com/NVIDIA/spark-rapids/issues/3849)|[BUG] In ANSI mode we can fail in cases Spark would not due to conditionals| +|[#4421](https://github.com/NVIDIA/spark-rapids/issues/4421)|[BUG] the driver is trying to load CUDA with latest 22.02 | +|[#4455](https://github.com/NVIDIA/spark-rapids/issues/4455)|[BUG] join_test.py::test_struct_self_join[IGNORE_ORDER({'local': True})] failed in spark330| +|[#4442](https://github.com/NVIDIA/spark-rapids/issues/4442)|[BUG] mvn build FAILED with option `-P noSnapshotsWithDatabricks`| +|[#4281](https://github.com/NVIDIA/spark-rapids/issues/4281)|[BUG] q9 regression between 21.10 and 21.12| +|[#4280](https://github.com/NVIDIA/spark-rapids/issues/4280)|[BUG] q88 regression between 21.10 and 21.12| +|[#4422](https://github.com/NVIDIA/spark-rapids/issues/4422)|[BUG] Host column vectors are being leaked during tests| +|[#4446](https://github.com/NVIDIA/spark-rapids/issues/4446)|[BUG] GpuCast crashes when casting from Array with unsupportable child type| +|[#4432](https://github.com/NVIDIA/spark-rapids/issues/4432)|[BUG] nightly build 3.3.0 failed: HashClusteredDistribution is not a member of org.apache.spark.sql.catalyst.plans.physical| +|[#4443](https://github.com/NVIDIA/spark-rapids/issues/4443)|[BUG] SPARK-37705 breaks parquet filters from Spark 3.3.0 and Spark 3.2.2 onwards| +|[#4316](https://github.com/NVIDIA/spark-rapids/issues/4316)|[BUG] Exception: Unable to find py4j, your SPARK_HOME may not be configured correctly intermittently| +|[#4378](https://github.com/NVIDIA/spark-rapids/issues/4378)|[BUG] udf_test udf_cudf_test failed require_minimum_pandas_version check in spark 320+| +|[#4423](https://github.com/NVIDIA/spark-rapids/issues/4423)|[BUG] Build is failing due to FileScanRDD changes in Spark 3.3.0-SNAPSHOT| +|[#4401](https://github.com/NVIDIA/spark-rapids/issues/4401)|[BUG]array_test.py::test_array_contains failures| +|[#4403](https://github.com/NVIDIA/spark-rapids/issues/4403)|[BUG] NDS query 72 logs codegen fallback exception and produces incorrect results| +|[#4386](https://github.com/NVIDIA/spark-rapids/issues/4386)|[BUG] conditionals_test.py FAILED with side_effects_cast[Integer/Long] on Databricks 9.1 Runtime| +|[#3934](https://github.com/NVIDIA/spark-rapids/issues/3934)|[BUG] Dependencies of published integration tests jar are missing| +|[#4341](https://github.com/NVIDIA/spark-rapids/issues/4341)|[BUG] GpuCast.scala:nnn warning: discarding unmoored doc comment| +|[#4356](https://github.com/NVIDIA/spark-rapids/issues/4356)|[BUG] nightly spark303 deploy pulling spark301 aggregator| +|[#4347](https://github.com/NVIDIA/spark-rapids/issues/4347)|[BUG] Dist jar pom lists aggregator jar as dependency| +|[#4176](https://github.com/NVIDIA/spark-rapids/issues/4176)|[BUG] ParseDateTimeSuite UT failed| +|[#4292](https://github.com/NVIDIA/spark-rapids/issues/4292)|[BUG] no meaningful message is surfaced to maven when binary-dedupe fails| +|[#4351](https://github.com/NVIDIA/spark-rapids/issues/4351)|[BUG] Tests FAILED On SPARK-3.2.0, com.nvidia.spark.rapids.SerializedTableColumn cannot be cast to com.nvidia.spark.rapids.GpuColumnVector| +|[#4346](https://github.com/NVIDIA/spark-rapids/issues/4346)|[BUG] q73 decimal was twice as slow in weekly results| +|[#4334](https://github.com/NVIDIA/spark-rapids/issues/4334)|[BUG] GpuColumnarToRowExec will always be tagged False for exportColumnarRdd after Spark311 | +|[#4339](https://github.com/NVIDIA/spark-rapids/issues/4339)|The parameter `dataType` is not necessary in `resolveColumnVector` method.| +|[#4275](https://github.com/NVIDIA/spark-rapids/issues/4275)|[BUG] Row-based Hive UDF will fail if arguments contain a foldable expression.| +|[#4229](https://github.com/NVIDIA/spark-rapids/issues/4229)|[BUG] regexp_replace `[^a]` has different behavior between CPU and GPU for multiline strings| +|[#4294](https://github.com/NVIDIA/spark-rapids/issues/4294)|[BUG] parquet_write_test.py::test_ts_write_fails_datetime_exception failed in spark 3.1.1 and 3.1.2| +|[#4205](https://github.com/NVIDIA/spark-rapids/issues/4205)|[BUG] Get different results when casting from timestamp to string| +|[#4277](https://github.com/NVIDIA/spark-rapids/issues/4277)|[BUG] cudf_udf nightly cudf import rmm failed| +|[#4246](https://github.com/NVIDIA/spark-rapids/issues/4246)|[BUG] Regression in CastOpSuite due to cuDF change in parsing NaN| +|[#4243](https://github.com/NVIDIA/spark-rapids/issues/4243)|[BUG] test_regexp_replace_null_pattern_fallback[ALLOW_NON_GPU(ProjectExec,RegExpReplace)] failed in databricks| +|[#4244](https://github.com/NVIDIA/spark-rapids/issues/4244)|[BUG] Cast from string to float using hand-picked values failed| +|[#4227](https://github.com/NVIDIA/spark-rapids/issues/4227)|[BUG] RAPIDS Shuffle Manager doesn't fallback given encryption settings| +|[#3374](https://github.com/NVIDIA/spark-rapids/issues/3374)|[BUG] minor deprecation warnings in a 3.2 shim build| +|[#3613](https://github.com/NVIDIA/spark-rapids/issues/3613)|[BUG] release312db profile pulls in 311until320-apache| +|[#4213](https://github.com/NVIDIA/spark-rapids/issues/4213)|[BUG] unused method with a misleading outdated comment in ShimLoader | +|[#3609](https://github.com/NVIDIA/spark-rapids/issues/3609)|[BUG] GpuShuffleExchangeExec in v2 shims has inconsistent packaging| +|[#4127](https://github.com/NVIDIA/spark-rapids/issues/4127)|[BUG] CUDF 22.02 nightly test failure| + +### PRs +||| +|:---|:---| +|[#4700](https://github.com/NVIDIA/spark-rapids/pull/4700)|Update cudfjni version to released 22.02.0| +|[#4701](https://github.com/NVIDIA/spark-rapids/pull/4701)|Decrease nighlty tests upper limitation to 7 [skip ci]| +|[#4639](https://github.com/NVIDIA/spark-rapids/pull/4639)|Update changelog for 22.02 and archive info of some older releases [skip ci]| +|[#4572](https://github.com/NVIDIA/spark-rapids/pull/4572)|Add download page for 22.02 [skip ci]| +|[#4672](https://github.com/NVIDIA/spark-rapids/pull/4672)|Revert "Disable 311cdh build due to missing dependency (#4659)"| +|[#4662](https://github.com/NVIDIA/spark-rapids/pull/4662)|Update the deploy script [skip ci]| +|[#4657](https://github.com/NVIDIA/spark-rapids/pull/4657)|Upmerge spark2 directory to the latest 22.02 changes| +|[#4659](https://github.com/NVIDIA/spark-rapids/pull/4659)|Disable 311cdh build by default because of a missing dependency| +|[#4508](https://github.com/NVIDIA/spark-rapids/pull/4508)|Fix Spark 3.2.1 build failures and make it non-snapshot| +|[#4652](https://github.com/NVIDIA/spark-rapids/pull/4652)|Remove non-deterministic test order in nightly [skip ci]| +|[#4643](https://github.com/NVIDIA/spark-rapids/pull/4643)|Add profile release301 when mvn help:evaluate| +|[#4630](https://github.com/NVIDIA/spark-rapids/pull/4630)|Fix the incomplete capture of SubqueryBroadcast | +|[#4633](https://github.com/NVIDIA/spark-rapids/pull/4633)|Suppress newTaskTempFile method warnings for Spark 3.3.0 build| +|[#4618](https://github.com/NVIDIA/spark-rapids/pull/4618)|[DB31x] Pick the correct Python runner for flatmap-group Pandas UDF| +|[#4622](https://github.com/NVIDIA/spark-rapids/pull/4622)|Fallback to CPU when encoding is not supported for JSON reader| +|[#4470](https://github.com/NVIDIA/spark-rapids/pull/4470)|Add in HashPartitioning support for decimal 128| +|[#4535](https://github.com/NVIDIA/spark-rapids/pull/4535)|Revert "Disable orc write by default because of https://issues.apache.org/jira/browse/ORC-1075 (#4471)"| +|[#4583](https://github.com/NVIDIA/spark-rapids/pull/4583)|Avoid unapply on PromotePrecision| +|[#4573](https://github.com/NVIDIA/spark-rapids/pull/4573)|Correct version from 21.12 to 22.02[skip ci]| +|[#4575](https://github.com/NVIDIA/spark-rapids/pull/4575)|Correct and update links in UDF doc[skip ci]| +|[#4501](https://github.com/NVIDIA/spark-rapids/pull/4501)|Switch and/or to use new cudf binops to improve performance| +|[#4594](https://github.com/NVIDIA/spark-rapids/pull/4594)|Resolve case-folding issue [skip ci]| +|[#4585](https://github.com/NVIDIA/spark-rapids/pull/4585)|Spark2 module upmerge, deploy script, and updates for Jenkins| +|[#4589](https://github.com/NVIDIA/spark-rapids/pull/4589)|Increase premerge databricks IDLE_TIMEOUT to 4 hours [skip ci]| +|[#4485](https://github.com/NVIDIA/spark-rapids/pull/4485)|Add json reader support| +|[#4556](https://github.com/NVIDIA/spark-rapids/pull/4556)|regexp_replace with back-references should fall back to CPU| +|[#4569](https://github.com/NVIDIA/spark-rapids/pull/4569)|Fix infinite loop with Profiling tool compare mode and app with no sql ids| +|[#4529](https://github.com/NVIDIA/spark-rapids/pull/4529)|Add support for Spark 2.x Explain Api| +|[#4577](https://github.com/NVIDIA/spark-rapids/pull/4577)|Revert "Fix CVE-2021-22569 (#4545)"| +|[#4520](https://github.com/NVIDIA/spark-rapids/pull/4520)|GpuSequence refactor| +|[#4570](https://github.com/NVIDIA/spark-rapids/pull/4570)|A few quick fixes to try to reduce max memory usage in the tests| +|[#4477](https://github.com/NVIDIA/spark-rapids/pull/4477)|Use libcudf mixed joins for conditional hash joins| +|[#4566](https://github.com/NVIDIA/spark-rapids/pull/4566)|remove scala-library from combined tools jar| +|[#4552](https://github.com/NVIDIA/spark-rapids/pull/4552)|Fix resource leak in GpuCaseWhen| +|[#4553](https://github.com/NVIDIA/spark-rapids/pull/4553)|Reenable test_hash_reduction_pivot_without_nans| +|[#4530](https://github.com/NVIDIA/spark-rapids/pull/4530)|Fix correctness issues in regexp and add `\r` and `\n` to fuzz tests| +|[#4549](https://github.com/NVIDIA/spark-rapids/pull/4549)|Fix typos in integration tests README [skip ci]| +|[#4545](https://github.com/NVIDIA/spark-rapids/pull/4545)|Fix CVE-2021-22569| +|[#4543](https://github.com/NVIDIA/spark-rapids/pull/4543)|Enable auto-merge from branch-22.02 to branch-22.04 [skip ci]| +|[#4540](https://github.com/NVIDIA/spark-rapids/pull/4540)|Remove user kuhushukla| +|[#4434](https://github.com/NVIDIA/spark-rapids/pull/4434)|Support max on single-level struct in aggregation context| +|[#4534](https://github.com/NVIDIA/spark-rapids/pull/4534)|Temporarily disable integration test - test_hash_reduction_pivot_without_nans| +|[#4322](https://github.com/NVIDIA/spark-rapids/pull/4322)|Add an explain only mode to the plugin| +|[#4497](https://github.com/NVIDIA/spark-rapids/pull/4497)|Make better use of pinned memory pool| +|[#4512](https://github.com/NVIDIA/spark-rapids/pull/4512)|remove hadoop version requirement[skip ci]| +|[#4527](https://github.com/NVIDIA/spark-rapids/pull/4527)|Fall back to CPU for regular expressions containing \D or \W| +|[#4525](https://github.com/NVIDIA/spark-rapids/pull/4525)|Properly close data writer in GpuFileFormatWriter| +|[#4502](https://github.com/NVIDIA/spark-rapids/pull/4502)|Removed the redundant test for element_at and fixed the failing one| +|[#4523](https://github.com/NVIDIA/spark-rapids/pull/4523)|Add more integration tests for decimal 128| +|[#3762](https://github.com/NVIDIA/spark-rapids/pull/3762)|Call the right method to convert table from row major <=> col major| +|[#4482](https://github.com/NVIDIA/spark-rapids/pull/4482)|Simplified the construction of zero scalar in GpuUnaryMinus| +|[#4510](https://github.com/NVIDIA/spark-rapids/pull/4510)|Update copyright in NOTICE [skip ci]| +|[#4484](https://github.com/NVIDIA/spark-rapids/pull/4484)|Update GpuFileFormatWriter to stay in sync with recent Spark changes, but still not support writing Hive bucketed table on GPU.| +|[#4492](https://github.com/NVIDIA/spark-rapids/pull/4492)|Fall back to CPU for regular expressions containing hex digits| +|[#4495](https://github.com/NVIDIA/spark-rapids/pull/4495)|Enable approx_percentile by default| +|[#4420](https://github.com/NVIDIA/spark-rapids/pull/4420)|Fix up incorrect results of rounding past the max digits of data type| +|[#4483](https://github.com/NVIDIA/spark-rapids/pull/4483)|Update test case of reading nested unsigned parquet file| +|[#4490](https://github.com/NVIDIA/spark-rapids/pull/4490)|Remove warning about RMM default allocator| +|[#4461](https://github.com/NVIDIA/spark-rapids/pull/4461)|[Audit] Add bucketed scan info in query plan of data source v1| +|[#4489](https://github.com/NVIDIA/spark-rapids/pull/4489)|Add arrays of decimal128 to join tests| +|[#4476](https://github.com/NVIDIA/spark-rapids/pull/4476)|Don't acquire the semaphore for empty input while scanning| +|[#4424](https://github.com/NVIDIA/spark-rapids/pull/4424)|Improve support for regular expression string anchors `\A`, `\Z`, and `\z`| +|[#4491](https://github.com/NVIDIA/spark-rapids/pull/4491)|Skip the test for spark versions 3.1.1, 3.1.2 and 3.2.0 only| +|[#4459](https://github.com/NVIDIA/spark-rapids/pull/4459)|Use merge sort for struct types in non-key columns| +|[#4494](https://github.com/NVIDIA/spark-rapids/pull/4494)|Append new authorized user to blossom-ci whitelist [skip ci]| +|[#4400](https://github.com/NVIDIA/spark-rapids/pull/4400)|Enable approx percentile tests| +|[#4471](https://github.com/NVIDIA/spark-rapids/pull/4471)|Disable orc write by default because of https://issues.apache.org/jira/browse/ORC-1075| +|[#4462](https://github.com/NVIDIA/spark-rapids/pull/4462)|Rename DECIMAL_128_FULL and rework usage of TypeSig.gpuNumeric| +|[#4479](https://github.com/NVIDIA/spark-rapids/pull/4479)|Change signoff check image to slim-buster [skip ci]| +|[#4464](https://github.com/NVIDIA/spark-rapids/pull/4464)|Throw SparkArrayIndexOutOfBoundsException for Spark 3.3.0+| +|[#4469](https://github.com/NVIDIA/spark-rapids/pull/4469)|Support repetition of \d and \D in regexp functions| +|[#4472](https://github.com/NVIDIA/spark-rapids/pull/4472)|Modify docs for 22.02 to address issue-4319[skip ci]| +|[#4440](https://github.com/NVIDIA/spark-rapids/pull/4440)|Enable GPU broadcast exchange reuse for DPP when AQE enabled| +|[#4376](https://github.com/NVIDIA/spark-rapids/pull/4376)|Add sequence support| +|[#4460](https://github.com/NVIDIA/spark-rapids/pull/4460)|Abstract the text based PartitionReader| +|[#4383](https://github.com/NVIDIA/spark-rapids/pull/4383)|Fix correctness issue with CASE WHEN with expressions that have side-effects| +|[#4465](https://github.com/NVIDIA/spark-rapids/pull/4465)|Refactor for shims 320+| +|[#4463](https://github.com/NVIDIA/spark-rapids/pull/4463)|Avoid replacing a hash join if build side is unsupported by the join type| +|[#4456](https://github.com/NVIDIA/spark-rapids/pull/4456)|Fix build issues: 1 clean non-exists target dirs; 2 remove duplicated plugin| +|[#4416](https://github.com/NVIDIA/spark-rapids/pull/4416)|Unshim join execs| +|[#4172](https://github.com/NVIDIA/spark-rapids/pull/4172)|Support String to Decimal 128| +|[#4458](https://github.com/NVIDIA/spark-rapids/pull/4458)|Exclude some metadata operators when checking GPU replacement| +|[#4451](https://github.com/NVIDIA/spark-rapids/pull/4451)|Some metrics improvements and timeline reporting| +|[#4435](https://github.com/NVIDIA/spark-rapids/pull/4435)|Disable add profile src execution by default to make the build log clean| +|[#4436](https://github.com/NVIDIA/spark-rapids/pull/4436)|Print error log to stderr output| +|[#4155](https://github.com/NVIDIA/spark-rapids/pull/4155)|Add partial support for line begin and end anchors in regexp_replace| +|[#4428](https://github.com/NVIDIA/spark-rapids/pull/4428)|Exhaustively iterate ColumnarToRow iterator to avoid leaks| +|[#4430](https://github.com/NVIDIA/spark-rapids/pull/4430)|update pca example link in ml-integration.md[skip ci]| +|[#4452](https://github.com/NVIDIA/spark-rapids/pull/4452)|Limit parallelism of nightly tests [skip ci]| +|[#4449](https://github.com/NVIDIA/spark-rapids/pull/4449)|Add recursive type checking and fallback tests for casting array with unsupported element types to string| +|[#4437](https://github.com/NVIDIA/spark-rapids/pull/4437)|Change logInfo to logWarning| +|[#4447](https://github.com/NVIDIA/spark-rapids/pull/4447)|Fix 330 build error and add 322 shims layer| +|[#4417](https://github.com/NVIDIA/spark-rapids/pull/4417)|Fix an Intellij debug issue| +|[#4431](https://github.com/NVIDIA/spark-rapids/pull/4431)|Add DateType support for AST expressions| +|[#4433](https://github.com/NVIDIA/spark-rapids/pull/4433)|Import the right pandas from conda [skip ci]| +|[#4419](https://github.com/NVIDIA/spark-rapids/pull/4419)|Import the right pandas from conda| +|[#4427](https://github.com/NVIDIA/spark-rapids/pull/4427)|Update getFileScanRDD shim for recent changes in Spark 3.3.0| +|[#4397](https://github.com/NVIDIA/spark-rapids/pull/4397)|Ignore cufile.log| +|[#4388](https://github.com/NVIDIA/spark-rapids/pull/4388)|Add support for ReplicateRows| +|[#4399](https://github.com/NVIDIA/spark-rapids/pull/4399)|Update docs for Profiling and Qualification tool to change wording| +|[#4407](https://github.com/NVIDIA/spark-rapids/pull/4407)|Fix GpuSubqueryBroadcast on multi-fields relation| +|[#4396](https://github.com/NVIDIA/spark-rapids/pull/4396)|GpuShuffleCoalesceIterator acquire semaphore after host concat| +|[#4361](https://github.com/NVIDIA/spark-rapids/pull/4361)|Accommodate altered semantics of `cudf::lists::contains()`| +|[#4394](https://github.com/NVIDIA/spark-rapids/pull/4394)|Use correct column name in GpuIf test| +|[#4385](https://github.com/NVIDIA/spark-rapids/pull/4385)|Add missing GpuSubqueryBroadcast replacement rule for spark31x | +|[#4387](https://github.com/NVIDIA/spark-rapids/pull/4387)|Fix auto merge conflict 4384[skip ci]| +|[#4374](https://github.com/NVIDIA/spark-rapids/pull/4374)|Fix the IT module depends on the tests module| +|[#4365](https://github.com/NVIDIA/spark-rapids/pull/4365)|Not publishing integration_tests jar to Maven Central [skip ci]| +|[#4358](https://github.com/NVIDIA/spark-rapids/pull/4358)|Update GpuIf to support expressions with side effects| +|[#4382](https://github.com/NVIDIA/spark-rapids/pull/4382)|Remove unused scallop dependency from integration_tests| +|[#4364](https://github.com/NVIDIA/spark-rapids/pull/4364)|Replace Scala document with Scala comment for inner functions| +|[#4373](https://github.com/NVIDIA/spark-rapids/pull/4373)|Add pytest tags for nightly test parallel run [skip ci]| +|[#4150](https://github.com/NVIDIA/spark-rapids/pull/4150)|Support GpuSubqueryBroadcast for DPP| +|[#4372](https://github.com/NVIDIA/spark-rapids/pull/4372)|Move casting to string tests from array_test.py and struct_test.py to cast_test.py| +|[#4371](https://github.com/NVIDIA/spark-rapids/pull/4371)|Fix typo in skipTestsFor330 calculation [skip ci]| +|[#4355](https://github.com/NVIDIA/spark-rapids/pull/4355)|Dedicated deploy-file with reduced pom in nightly build [skip ci]| +|[#4352](https://github.com/NVIDIA/spark-rapids/pull/4352)|Revert "Ignore failing string to timestamp tests temporarily (#4197)"| +|[#4359](https://github.com/NVIDIA/spark-rapids/pull/4359)|Audit - SPARK-37268 - Remove unused variable in GpuFileScanRDD [Databricks]| +|[#4327](https://github.com/NVIDIA/spark-rapids/pull/4327)|Print meaningful message when calling scripts in maven| +|[#4354](https://github.com/NVIDIA/spark-rapids/pull/4354)|Fix regression in AQE optimizations| +|[#4343](https://github.com/NVIDIA/spark-rapids/pull/4343)|Fix issue with binding to hash agg columns with computation| +|[#4285](https://github.com/NVIDIA/spark-rapids/pull/4285)|Add support for regexp_extract on the GPU| +|[#4349](https://github.com/NVIDIA/spark-rapids/pull/4349)|Fix PYTHONPATH in pre-merge| +|[#4269](https://github.com/NVIDIA/spark-rapids/pull/4269)|The option for the nightly script not deploying jars [skip ci]| +|[#4335](https://github.com/NVIDIA/spark-rapids/pull/4335)|Fix the issue of exporting Column RDD| +|[#4336](https://github.com/NVIDIA/spark-rapids/pull/4336)|Split expensive pytest files in cases level [skip ci]| +|[#4328](https://github.com/NVIDIA/spark-rapids/pull/4328)|Change the explanation of why the operator will not work on GPU| +|[#4338](https://github.com/NVIDIA/spark-rapids/pull/4338)|Use scala Int.box instead of Integer constructors | +|[#4340](https://github.com/NVIDIA/spark-rapids/pull/4340)|Remove the unnecessary parameter `dataType` in `resolveColumnVector` method| +|[#4256](https://github.com/NVIDIA/spark-rapids/pull/4256)|Allow returning an EmptyHashedRelation when a broadcast result is empty| +|[#4333](https://github.com/NVIDIA/spark-rapids/pull/4333)|Add tests about writing empty table to ORC/PAQUET| +|[#4337](https://github.com/NVIDIA/spark-rapids/pull/4337)|Support GpuFirst and GpuLast on nested types under reduction aggregations| +|[#4331](https://github.com/NVIDIA/spark-rapids/pull/4331)|Fix parquet options builder calls| +|[#4310](https://github.com/NVIDIA/spark-rapids/pull/4310)|Fix typo in shim class name| +|[#4326](https://github.com/NVIDIA/spark-rapids/pull/4326)|Fix 4315 decrease concurrentGpuTasks to avoid sum test OOM| +|[#4266](https://github.com/NVIDIA/spark-rapids/pull/4266)|Check revisions for all shim jars while build all| +|[#4282](https://github.com/NVIDIA/spark-rapids/pull/4282)|Use data type to create an inspector for a foldable GPU expression.| +|[#3144](https://github.com/NVIDIA/spark-rapids/pull/3144)|Optimize AQE with Spark 3.2+ to avoid redundant transitions| +|[#4317](https://github.com/NVIDIA/spark-rapids/pull/4317)|[BUG] Update nightly test script to dynamically set mem_fraction [skip ci]| +|[#4206](https://github.com/NVIDIA/spark-rapids/pull/4206)|Porting GpuRowToColumnar converters to InternalColumnarRDDConverter| +|[#4272](https://github.com/NVIDIA/spark-rapids/pull/4272)|Full support for SUM overflow detection on decimal| +|[#4255](https://github.com/NVIDIA/spark-rapids/pull/4255)|Make regexp pattern `[^a]` consistent with Spark for multiline strings| +|[#4306](https://github.com/NVIDIA/spark-rapids/pull/4306)|Revert commonizing the int96ParquetRebase* functions | +|[#4299](https://github.com/NVIDIA/spark-rapids/pull/4299)|Fix auto merge conflict 4298 [skip ci]| +|[#4159](https://github.com/NVIDIA/spark-rapids/pull/4159)|Optimize sample perf| +|[#4235](https://github.com/NVIDIA/spark-rapids/pull/4235)|Commonize v2 shim| +|[#4274](https://github.com/NVIDIA/spark-rapids/pull/4274)|Add tests for timestamps that overflowed before.| +|[#4271](https://github.com/NVIDIA/spark-rapids/pull/4271)|Skip test_regexp_replace_null_pattern_fallback on Spark 3.1.1 and later| +|[#4278](https://github.com/NVIDIA/spark-rapids/pull/4278)|Use mamba for cudf conda install [skip ci]| +|[#4270](https://github.com/NVIDIA/spark-rapids/pull/4270)|Document exponent differences when casting floating point to string [skip ci]| +|[#4268](https://github.com/NVIDIA/spark-rapids/pull/4268)|Fix merge conflict with branch-21.12| +|[#4093](https://github.com/NVIDIA/spark-rapids/pull/4093)|Add tests for regexp() and regexp_like()| +|[#4259](https://github.com/NVIDIA/spark-rapids/pull/4259)|fix regression in cast from string to float that caused signed NaN to be considered valid| +|[#4241](https://github.com/NVIDIA/spark-rapids/pull/4241)|fix bug in parsing regex character classes that start with `^` and contain an unescaped `]`| +|[#4224](https://github.com/NVIDIA/spark-rapids/pull/4224)|Support row-based Hive UDFs| +|[#4221](https://github.com/NVIDIA/spark-rapids/pull/4221)|GpuCast from ArrayType to StringType| +|[#4007](https://github.com/NVIDIA/spark-rapids/pull/4007)|Implement duplicate key handling for GpuCreateMap| +|[#4251](https://github.com/NVIDIA/spark-rapids/pull/4251)|Skip test_regexp_replace_null_pattern_fallback on Databricks| +|[#4247](https://github.com/NVIDIA/spark-rapids/pull/4247)|Disable failing CastOpSuite test| +|[#4239](https://github.com/NVIDIA/spark-rapids/pull/4239)|Make EOL anchor behavior match CPU for strings ending with newline| +|[#4153](https://github.com/NVIDIA/spark-rapids/pull/4153)|Regexp: Only transpile once per expression rather than once per batch| +|[#4230](https://github.com/NVIDIA/spark-rapids/pull/4230)|Change to build tools module with all the versions by default| +|[#4223](https://github.com/NVIDIA/spark-rapids/pull/4223)|Fixes a minor deprecation warning| +|[#4215](https://github.com/NVIDIA/spark-rapids/pull/4215)|Rebalance testing load| +|[#4214](https://github.com/NVIDIA/spark-rapids/pull/4214)|Fix pre_merge ci_2 [skip ci]| +|[#4212](https://github.com/NVIDIA/spark-rapids/pull/4212)|Remove an unused method with its outdated comment| +|[#4211](https://github.com/NVIDIA/spark-rapids/pull/4211)|Update test_floor_ceil_overflow to be more lenient on exception type| +|[#4203](https://github.com/NVIDIA/spark-rapids/pull/4203)|Move all the GpuShuffleExchangeExec shim v2 classes to org.apache.spark| +|[#4193](https://github.com/NVIDIA/spark-rapids/pull/4193)|Rename 311until320-apache to 311until320-noncdh| +|[#4197](https://github.com/NVIDIA/spark-rapids/pull/4197)|Ignore failing string to timestamp tests temporarily| +|[#4160](https://github.com/NVIDIA/spark-rapids/pull/4160)|Fix merge issues for branch 22.02| +|[#4081](https://github.com/NVIDIA/spark-rapids/pull/4081)|Convert String to DecimalType without casting to FloatType| +|[#4132](https://github.com/NVIDIA/spark-rapids/pull/4132)|Fix auto merge conflict 4131 [skip ci]| +|[#4099](https://github.com/NVIDIA/spark-rapids/pull/4099)|[REVIEW] Init version 22.02.0| +|[#4113](https://github.com/NVIDIA/spark-rapids/pull/4113)|Fix pre-merge CI 2 conditions [skip ci]| +|[#4064](https://github.com/NVIDIA/spark-rapids/pull/4064)|Regex: transpile `.` to `[^\r\n]` in cuDF| +|[#4044](https://github.com/NVIDIA/spark-rapids/pull/4044)|RLike: Fall back to CPU for regex that would produce incorrect results| ## Release 21.12 ### Features ||| |:---|:---| +|[#1571](https://github.com/NVIDIA/spark-rapids/issues/1571)|[FEA] Better precision range for decimal multiply, and possibly others| |[#3953](https://github.com/NVIDIA/spark-rapids/issues/3953)|[FEA] Audit: Add array support to union by name | |[#4085](https://github.com/NVIDIA/spark-rapids/issues/4085)|[FEA] Decimal 128 Support: Concat| |[#4073](https://github.com/NVIDIA/spark-rapids/issues/4073)|[FEA] Decimal 128 Support: MapKeys, MapValues, MapEntries| @@ -106,6 +401,11 @@ Generated on 2021-12-07 ### PRs ||| |:---|:---| +|[#4362](https://github.com/NVIDIA/spark-rapids/pull/4362)|Decimal128 support for Parquet| +|[#4391](https://github.com/NVIDIA/spark-rapids/pull/4391)|update gcp custom dataproc image version to avoid log4j issue[skip ci]| +|[#4379](https://github.com/NVIDIA/spark-rapids/pull/4379)|update hot fix cudf link v21.12.2| +|[#4367](https://github.com/NVIDIA/spark-rapids/pull/4367)|update 21.12 branch for doc [skip ci]| +|[#4245](https://github.com/NVIDIA/spark-rapids/pull/4245)|Update changelog 21.12 to latest [skip ci]| |[#4258](https://github.com/NVIDIA/spark-rapids/pull/4258)|Sanitize column names in ParquetCachedBatchSerializer before writing to Parquet| |[#4308](https://github.com/NVIDIA/spark-rapids/pull/4308)|Bump up GPU reserve memory to 640MB| |[#4307](https://github.com/NVIDIA/spark-rapids/pull/4307)|Update Download page for 21.12 [skip ci]| @@ -1228,1322 +1528,5 @@ Generated on 2021-12-07 |[#2402](https://github.com/NVIDIA/spark-rapids/pull/2402)|Add profiling tool| |[#2313](https://github.com/NVIDIA/spark-rapids/pull/2313)|Supports `GpuLiteral` of array type| -## Release 0.5 - -### Features -||| -|:---|:---| -|[#938](https://github.com/NVIDIA/spark-rapids/issues/938)|[FEA] Have hashed shuffle match spark| -|[#1604](https://github.com/NVIDIA/spark-rapids/issues/1604)|[FEA] Support casting structs to strings | -|[#1920](https://github.com/NVIDIA/spark-rapids/issues/1920)|[FEA] Support murmur3 hashing of structs| -|[#2018](https://github.com/NVIDIA/spark-rapids/issues/2018)|[FEA] A way for user to find out the plugin version and cudf version in REPL| -|[#77](https://github.com/NVIDIA/spark-rapids/issues/77)|[FEA] Support ArrayContains| -|[#1721](https://github.com/NVIDIA/spark-rapids/issues/1721)|[FEA] build cudf jars with NVTX enabled| -|[#1782](https://github.com/NVIDIA/spark-rapids/issues/1782)|[FEA] Shim layers to support spark versions| -|[#1625](https://github.com/NVIDIA/spark-rapids/issues/1625)|[FEA] Support Decimal Casts to String and String to Decimal| -|[#166](https://github.com/NVIDIA/spark-rapids/issues/166)|[FEA] Support get_json_object| -|[#1698](https://github.com/NVIDIA/spark-rapids/issues/1698)|[FEA] Support casting structs to string| -|[#1912](https://github.com/NVIDIA/spark-rapids/issues/1912)|[FEA] Let `Scalar Pandas UDF ` support array of struct type.| -|[#1136](https://github.com/NVIDIA/spark-rapids/issues/1136)|[FEA] Audit: Script to list commits between different Spark versions/tags| -|[#1921](https://github.com/NVIDIA/spark-rapids/issues/1921)|[FEA] cudf version check should be lenient on later patch version| -|[#19](https://github.com/NVIDIA/spark-rapids/issues/19)|[FEA] Out of core sorts| - -### Performance -||| -|:---|:---| -|[#2090](https://github.com/NVIDIA/spark-rapids/issues/2090)|[FEA] Make row count estimates available to the cost-based optimizer| -|[#1341](https://github.com/NVIDIA/spark-rapids/issues/1341)|Optimize unnecessary columnar->row->columnar transitions with AQE| -|[#1558](https://github.com/NVIDIA/spark-rapids/issues/1558)|[FEA] Initialize UCX early| -|[#1633](https://github.com/NVIDIA/spark-rapids/issues/1633)|[FEA] Implement a cost-based optimizer| -|[#1727](https://github.com/NVIDIA/spark-rapids/issues/1727)|[FEA] Put RangePartitioner data path on the GPU| - -### Bugs Fixed -||| -|:---|:---| -|[#2279](https://github.com/NVIDIA/spark-rapids/issues/2279)|[BUG] Hash Partitioning can fail for very small batches| -|[#2314](https://github.com/NVIDIA/spark-rapids/issues/2314)|[BUG] v0.5.0 pre-release pytests join_test.py::test_hash_join_array FAILED on SPARK-EGX Yarn Cluster| -|[#2317](https://github.com/NVIDIA/spark-rapids/issues/2317)|[BUG] GpuColumnarToRowIterator can stop after receiving an empty batch| -|[#2244](https://github.com/NVIDIA/spark-rapids/issues/2244)|[BUG] Executors hanging when running NDS benchmarks| -|[#2278](https://github.com/NVIDIA/spark-rapids/issues/2278)|[BUG] FullOuter join can produce too many results| -|[#2220](https://github.com/NVIDIA/spark-rapids/issues/2220)|[BUG] csv_test.py::test_csv_fallback FAILED on the EMR Cluster| -|[#2225](https://github.com/NVIDIA/spark-rapids/issues/2225)|[BUG] GpuSort fails on tables containing arrays.| -|[#2232](https://github.com/NVIDIA/spark-rapids/issues/2232)|[BUG] hash_aggregate_test.py::test_hash_grpby_pivot FAILED on the Databricks Cluster| -|[#2231](https://github.com/NVIDIA/spark-rapids/issues/2231)|[BUG]string_test.py::test_re_replace FAILED on the Dataproc Cluster| -|[#2042](https://github.com/NVIDIA/spark-rapids/issues/2042)|[BUG] NDS q14a fails with "GpuColumnarToRow does not implement doExecuteBroadcast"| -|[#2203](https://github.com/NVIDIA/spark-rapids/issues/2203)|[BUG] Spark nightly cache tests fail with -- master flag| -|[#2230](https://github.com/NVIDIA/spark-rapids/issues/2230)|[BUG] qa_nightly_select_test.py::test_select FAILED on the Dataproc Cluster| -|[#1711](https://github.com/NVIDIA/spark-rapids/issues/1711)|[BUG] find a way to stop allocating from RMM on the shuffle-client thread| -|[#2109](https://github.com/NVIDIA/spark-rapids/issues/2109)|[BUG] Fix high priority violations detected by code analysis tools| -|[#2217](https://github.com/NVIDIA/spark-rapids/issues/2217)|[BUG] qa_nightly_select_test failure in test_select | -|[#2127](https://github.com/NVIDIA/spark-rapids/issues/2127)|[BUG] Parsing with two-digit year should fall back to CPU| -|[#2078](https://github.com/NVIDIA/spark-rapids/issues/2078)|[BUG] java.lang.ArithmeticException: divide by zero when spark.sql.ansi.enabled=true| -|[#2048](https://github.com/NVIDIA/spark-rapids/issues/2048)|[BUG] split function+ repartition result in "ai.rapids.cudf.CudaException: device-side assert triggered"| -|[#2036](https://github.com/NVIDIA/spark-rapids/issues/2036)|[BUG] Stackoverflow when writing wide parquet files.| -|[#1973](https://github.com/NVIDIA/spark-rapids/issues/1973)|[BUG] generate_expr_test FAILED on Dataproc Cluster| -|[#2079](https://github.com/NVIDIA/spark-rapids/issues/2079)|[BUG] koalas.sql fails with java.lang.ArrayIndexOutOfBoundsException| -|[#217](https://github.com/NVIDIA/spark-rapids/issues/217)|[BUG] CudaUtil should be removed| -|[#1550](https://github.com/NVIDIA/spark-rapids/issues/1550)|[BUG] The ORC output data of a query is not readable| -|[#2074](https://github.com/NVIDIA/spark-rapids/issues/2074)|[BUG] Intermittent NPE in RapidsBufferCatalog when running test suite| -|[#2027](https://github.com/NVIDIA/spark-rapids/issues/2027)|[BUG] udf_cudf_test.py integration tests fail | -|[#1899](https://github.com/NVIDIA/spark-rapids/issues/1899)|[BUG] Some queries fail when cost-based optimizations are enabled| -|[#1914](https://github.com/NVIDIA/spark-rapids/issues/1914)|[BUG] Add in float, double, timestamp, and date support to murmur3| -|[#2014](https://github.com/NVIDIA/spark-rapids/issues/2014)|[BUG] earlyStart option added in 0.5 can cause errors when starting UCX| -|[#1984](https://github.com/NVIDIA/spark-rapids/issues/1984)|[BUG] NDS q58 Decimal scale (59) cannot be greater than precision (38).| -|[#2001](https://github.com/NVIDIA/spark-rapids/issues/2001)|[BUG] RapidsShuffleManager didn't pass `dirs` to `getBlockData` from a wrapped `ShuffleBlockResolver`| -|[#1797](https://github.com/NVIDIA/spark-rapids/issues/1797)|[BUG] occasional crashes in CI| -|[#1861](https://github.com/NVIDIA/spark-rapids/issues/1861)|Encountered column data outside the range of input buffer| -|[#1905](https://github.com/NVIDIA/spark-rapids/issues/1905)|[BUG] Large concat task time in GpuShuffleCoalesce with pinned memory pool| -|[#1638](https://github.com/NVIDIA/spark-rapids/issues/1638)|[BUG] Tests `test_window_aggs_for_rows_collect_list` fails when there are null values in columns.| -|[#1864](https://github.com/NVIDIA/spark-rapids/issues/1864)|[BUG]HostColumnarToGPU inefficient when only doing count()| -|[#1862](https://github.com/NVIDIA/spark-rapids/issues/1862)|[BUG] spark 3.2.0-snapshot integration test failed due to conf change| -|[#1844](https://github.com/NVIDIA/spark-rapids/issues/1844)|[BUG] branch-0.5 nightly IT FAILED on the The mortgage ETL test "Could not read footer for file: file:/xxx/xxx.snappy.parquet"| -|[#1627](https://github.com/NVIDIA/spark-rapids/issues/1627)|[BUG] GDS exception when restoring spilled buffer| -|[#1802](https://github.com/NVIDIA/spark-rapids/issues/1802)|[BUG] Many decimal integration test failures for 0.5| - -### PRs -||| -|:---|:---| -|[#2326](https://github.com/NVIDIA/spark-rapids/pull/2326)|Update changelog for 0.5.0 release| -|[#2316](https://github.com/NVIDIA/spark-rapids/pull/2316)|Update doc to note that single quoted json strings are not ok| -|[#2319](https://github.com/NVIDIA/spark-rapids/pull/2319)|Disable hash partitioning on arrays| -|[#2318](https://github.com/NVIDIA/spark-rapids/pull/2318)|Fix ColumnarToRowIterator handling of empty batches| -|[#2304](https://github.com/NVIDIA/spark-rapids/pull/2304)|Update CHANGELOG.md| -|[#2301](https://github.com/NVIDIA/spark-rapids/pull/2301)|Update doc to reflect nanosleep problem with 460.32.03| -|[#2298](https://github.com/NVIDIA/spark-rapids/pull/2298)|Update changelog for v0.5.0 release [skip ci]| -|[#2293](https://github.com/NVIDIA/spark-rapids/pull/2293)|update cudf version to 0.19.2| -|[#2289](https://github.com/NVIDIA/spark-rapids/pull/2289)|Update docs to warn against 450.80.02 driver with 10.x toolkit| -|[#2285](https://github.com/NVIDIA/spark-rapids/pull/2285)|Require single batch for full outer join streaming| -|[#2281](https://github.com/NVIDIA/spark-rapids/pull/2281)|Remove download section for unreleased 0.4.2| -|[#2264](https://github.com/NVIDIA/spark-rapids/pull/2264)|Add spark312 and spark320 versions of cache serializer| -|[#2254](https://github.com/NVIDIA/spark-rapids/pull/2254)|updated gcp docs with custom dataproc image instructions| -|[#2247](https://github.com/NVIDIA/spark-rapids/pull/2247)|Allow specifying a superclass for non-GPU execs| -|[#2235](https://github.com/NVIDIA/spark-rapids/pull/2235)|Fix distributed cache to read requested schema | -|[#2261](https://github.com/NVIDIA/spark-rapids/pull/2261)|Make CBO row count test more robust| -|[#2237](https://github.com/NVIDIA/spark-rapids/pull/2237)|update cudf version to 0.19.1| -|[#2240](https://github.com/NVIDIA/spark-rapids/pull/2240)|Get the correct 'PIPESTATUS' in bash [skip ci]| -|[#2242](https://github.com/NVIDIA/spark-rapids/pull/2242)|Add shuffle doc section on the periodicGC configuration| -|[#2251](https://github.com/NVIDIA/spark-rapids/pull/2251)|Fix issue when out of core sorting nested data types| -|[#2204](https://github.com/NVIDIA/spark-rapids/pull/2204)|Run nightly tests for ParquetCachedBatchSerializer| -|[#2245](https://github.com/NVIDIA/spark-rapids/pull/2245)|Fix pivot bug for decimalType| -|[#2093](https://github.com/NVIDIA/spark-rapids/pull/2093)|Initial implementation of row count estimates in cost-based optimizer| -|[#2188](https://github.com/NVIDIA/spark-rapids/pull/2188)|Support GPU broadcast exchange reuse to feed CPU BHJ when AQE is enabled| -|[#2227](https://github.com/NVIDIA/spark-rapids/pull/2227)|ParquetCachedBatchSerializer broadcast AllConfs instead of SQLConf to fix distributed mode| -|[#2223](https://github.com/NVIDIA/spark-rapids/pull/2223)|Adds subquery aggregate tests from SPARK-31620| -|[#2222](https://github.com/NVIDIA/spark-rapids/pull/2222)|Remove groupId already specified in parent pom| -|[#2209](https://github.com/NVIDIA/spark-rapids/pull/2209)|Fixed a few issues with out of core sort| -|[#2218](https://github.com/NVIDIA/spark-rapids/pull/2218)|Fix incorrect RegExpReplace children handling on Spark 3.1+| -|[#2207](https://github.com/NVIDIA/spark-rapids/pull/2207)|fix batch size default values in the tuning guide| -|[#2208](https://github.com/NVIDIA/spark-rapids/pull/2208)|Revert "add nightly cache tests (#2083)"| -|[#2206](https://github.com/NVIDIA/spark-rapids/pull/2206)|Fix shim301db build| -|[#2192](https://github.com/NVIDIA/spark-rapids/pull/2192)|Fix index-based access to the head elements| -|[#2210](https://github.com/NVIDIA/spark-rapids/pull/2210)|Avoid redundant collection conversions| -|[#2190](https://github.com/NVIDIA/spark-rapids/pull/2190)|JNI fixes for StringWordCount native UDF example| -|[#2086](https://github.com/NVIDIA/spark-rapids/pull/2086)|Updating documentation for data format support| -|[#2172](https://github.com/NVIDIA/spark-rapids/pull/2172)|Remove easy unused symbols| -|[#2089](https://github.com/NVIDIA/spark-rapids/pull/2089)|Update PandasUDF doc| -|[#2195](https://github.com/NVIDIA/spark-rapids/pull/2195)|fix cudf 0.19.0 download link [skip ci]| -|[#2175](https://github.com/NVIDIA/spark-rapids/pull/2175)|Branch 0.5 doc update| -|[#2168](https://github.com/NVIDIA/spark-rapids/pull/2168)|Simplify GpuExpressions w/ withResourceIfAllowed| -|[#2055](https://github.com/NVIDIA/spark-rapids/pull/2055)|Support PivotFirst| -|[#2183](https://github.com/NVIDIA/spark-rapids/pull/2183)|GpuParquetScan#readBufferToTable remove dead code| -|[#2129](https://github.com/NVIDIA/spark-rapids/pull/2129)|Fall back to CPU when parsing two-digit years| -|[#2083](https://github.com/NVIDIA/spark-rapids/pull/2083)|add nightly cache tests| -|[#2151](https://github.com/NVIDIA/spark-rapids/pull/2151)|add corresponding close call for HostMemoryOutputStream| -|[#2169](https://github.com/NVIDIA/spark-rapids/pull/2169)|Work around bug in Spark for integration test| -|[#2130](https://github.com/NVIDIA/spark-rapids/pull/2130)|Fix divide-by-zero in GpuAverage with ansi mode| -|[#2149](https://github.com/NVIDIA/spark-rapids/pull/2149)|Auto generate the supported types for the file formats| -|[#2072](https://github.com/NVIDIA/spark-rapids/pull/2072)|Disable CSV parsing by default and update tests to better show what is left| -|[#2157](https://github.com/NVIDIA/spark-rapids/pull/2157)|fix merge conflict for 0.4.2 [skip ci]| -|[#2144](https://github.com/NVIDIA/spark-rapids/pull/2144)|Allow array and struct types to pass thru when doing join| -|[#2145](https://github.com/NVIDIA/spark-rapids/pull/2145)|Avoid GPU shuffle for round-robin of unsortable types| -|[#2021](https://github.com/NVIDIA/spark-rapids/pull/2021)|Add in support for murmur3 hashing of structs| -|[#2128](https://github.com/NVIDIA/spark-rapids/pull/2128)|Add in Partition type check support| -|[#2116](https://github.com/NVIDIA/spark-rapids/pull/2116)|Add dynamic Spark configuration for Databricks| -|[#2132](https://github.com/NVIDIA/spark-rapids/pull/2132)|Log plugin and cudf versions on startup| -|[#2135](https://github.com/NVIDIA/spark-rapids/pull/2135)|Disable Spark 3.2 shim by default| -|[#2125](https://github.com/NVIDIA/spark-rapids/pull/2125)|enable auto-merge from 0.5 to 0.6 [skip ci]| -|[#2120](https://github.com/NVIDIA/spark-rapids/pull/2120)|Materialize Stream before serialization| -|[#2119](https://github.com/NVIDIA/spark-rapids/pull/2119)|Add more comprehensive documentation on supported date formats| -|[#1717](https://github.com/NVIDIA/spark-rapids/pull/1717)|Decimal32 support| -|[#2114](https://github.com/NVIDIA/spark-rapids/pull/2114)|Modified the Download page for 0.4.1 and updated doc to point to K8s guide| -|[#2106](https://github.com/NVIDIA/spark-rapids/pull/2106)|Fix some buffer leaks| -|[#2097](https://github.com/NVIDIA/spark-rapids/pull/2097)|fix the bound row project empty issue in row frame| -|[#2099](https://github.com/NVIDIA/spark-rapids/pull/2099)|Remove verbose log prints to make the build/test log clean| -|[#2105](https://github.com/NVIDIA/spark-rapids/pull/2105)|Cleanup prior Spark sessions in tests consistently| -|[#2104](https://github.com/NVIDIA/spark-rapids/pull/2104)| Clone apache spark source code to parse the git commit IDs| -|[#2095](https://github.com/NVIDIA/spark-rapids/pull/2095)|fix refcount when materializing device buffer from GDS| -|[#2100](https://github.com/NVIDIA/spark-rapids/pull/2100)|[BUG] add wget for fetching conda [skip ci]| -|[#2096](https://github.com/NVIDIA/spark-rapids/pull/2096)|Adjust images for integration tests| -|[#2094](https://github.com/NVIDIA/spark-rapids/pull/2094)|Changed name of parquet files for Mortgage ETL Integration test| -|[#2035](https://github.com/NVIDIA/spark-rapids/pull/2035)|Accelerate data transfer for map Pandas UDF plan| -|[#2050](https://github.com/NVIDIA/spark-rapids/pull/2050)|stream shuffle buffers from GDS to UCX| -|[#2084](https://github.com/NVIDIA/spark-rapids/pull/2084)|Enable ORC write by default| -|[#2088](https://github.com/NVIDIA/spark-rapids/pull/2088)|Upgrade ScalaTest plugin to respect JAVA_HOME| -|[#1932](https://github.com/NVIDIA/spark-rapids/pull/1932)|Create a getting started on K8s page| -|[#2080](https://github.com/NVIDIA/spark-rapids/pull/2080)|Improve error message after failed RMM shutdown| -|[#2064](https://github.com/NVIDIA/spark-rapids/pull/2064)|Optimize unnecessary columnar->row->columnar transitions with AQE| -|[#2025](https://github.com/NVIDIA/spark-rapids/pull/2025)|Update the doc for pandas udf on databricks| -|[#2059](https://github.com/NVIDIA/spark-rapids/pull/2059)|Add the flag 'TEST_TYPE' to avoid integration tests silently skipping some test cases| -|[#2075](https://github.com/NVIDIA/spark-rapids/pull/2075)|Remove debug println from CBO test| -|[#2046](https://github.com/NVIDIA/spark-rapids/pull/2046)|support casting Decimal to String| -|[#1812](https://github.com/NVIDIA/spark-rapids/pull/1812)|allow spilled buffers to be unspilled| -|[#2061](https://github.com/NVIDIA/spark-rapids/pull/2061)|Run the pandas udf using cudf on Databricks| -|[#1893](https://github.com/NVIDIA/spark-rapids/pull/1893)|Plug-in support for get_json_object| -|[#2044](https://github.com/NVIDIA/spark-rapids/pull/2044)|Use partition for GPU hash partitioning| -|[#1954](https://github.com/NVIDIA/spark-rapids/pull/1954)|Fix CBO bug where incompatible plans were produced with AQE on| -|[#2049](https://github.com/NVIDIA/spark-rapids/pull/2049)|Remove incompatable int overflow checking| -|[#2056](https://github.com/NVIDIA/spark-rapids/pull/2056)|Remove Spark 3.2 from premerge and nightly CI run| -|[#1814](https://github.com/NVIDIA/spark-rapids/pull/1814)|Struct to string casting functionality| -|[#2037](https://github.com/NVIDIA/spark-rapids/pull/2037)|Fix warnings from use of deprecated cudf methods| -|[#2033](https://github.com/NVIDIA/spark-rapids/pull/2033)|Bump up pre-merge OS from ubuntu 16 to ubuntu 18 [skip ci]| -|[#1883](https://github.com/NVIDIA/spark-rapids/pull/1883)|Enable sort for single-level nesting struct columns on GPU| -|[#2016](https://github.com/NVIDIA/spark-rapids/pull/2016)|Refactor logic for parallel testing| -|[#2022](https://github.com/NVIDIA/spark-rapids/pull/2022)|Update order by to not load native libraries when sorting| -|[#2017](https://github.com/NVIDIA/spark-rapids/pull/2017)|Add in murmur3 support for float, double, date and timestamp| -|[#1981](https://github.com/NVIDIA/spark-rapids/pull/1981)|Fix GpuSize| -|[#1999](https://github.com/NVIDIA/spark-rapids/pull/1999)|support casting string to decimal| -|[#2006](https://github.com/NVIDIA/spark-rapids/pull/2006)|Enable windowed `collect_list` by default| -|[#2000](https://github.com/NVIDIA/spark-rapids/pull/2000)|Use Spark's HybridRowQueue to avoid MemoryConsumer API shim| -|[#2015](https://github.com/NVIDIA/spark-rapids/pull/2015)|Fix bug where rkey buffer is getting advanced after the first handshake| -|[#2007](https://github.com/NVIDIA/spark-rapids/pull/2007)|Fix unknown column name error when filtering ORC file with no names| -|[#2005](https://github.com/NVIDIA/spark-rapids/pull/2005)|Update to new is_before_spark_311 function name| -|[#1944](https://github.com/NVIDIA/spark-rapids/pull/1944)|Support running scalar pandas UDF with array type.| -|[#1991](https://github.com/NVIDIA/spark-rapids/pull/1991)|Fixes creation of invalid DecimalType in GpuDivide.tagExprForGpu| -|[#1958](https://github.com/NVIDIA/spark-rapids/pull/1958)|Support legacy behavior of parameterless count | -|[#1919](https://github.com/NVIDIA/spark-rapids/pull/1919)|Add support for Structs for UnionExec| -|[#2002](https://github.com/NVIDIA/spark-rapids/pull/2002)|Pass dirs to getBlockData for a wrapped shuffle resolver| -|[#1983](https://github.com/NVIDIA/spark-rapids/pull/1983)|document building against different CUDA Toolkit versions| -|[#1994](https://github.com/NVIDIA/spark-rapids/pull/1994)|Merge 0.4 to 0.5 [skip ci]| -|[#1982](https://github.com/NVIDIA/spark-rapids/pull/1982)|Update ORC pushdown filter building to latest Spark logic| -|[#1978](https://github.com/NVIDIA/spark-rapids/pull/1978)|Add audit script to list commits from Spark| -|[#1976](https://github.com/NVIDIA/spark-rapids/pull/1976)|Temp fix for parquet write changes| -|[#1970](https://github.com/NVIDIA/spark-rapids/pull/1970)|add maven profiles for supported CUDA versions| -|[#1951](https://github.com/NVIDIA/spark-rapids/pull/1951)|Branch 0.5 doc remove numpartitions| -|[#1967](https://github.com/NVIDIA/spark-rapids/pull/1967)|Update FAQ for Dataset API and format supported versions| -|[#1972](https://github.com/NVIDIA/spark-rapids/pull/1972)|support GpuSize| -|[#1966](https://github.com/NVIDIA/spark-rapids/pull/1966)|add xml report for codecov| -|[#1955](https://github.com/NVIDIA/spark-rapids/pull/1955)|Fix typo in Arrow optimization config| -|[#1956](https://github.com/NVIDIA/spark-rapids/pull/1956)|Fix NPE in plugin shutdown| -|[#1930](https://github.com/NVIDIA/spark-rapids/pull/1930)|Relax cudf version check for patch-level versions| -|[#1787](https://github.com/NVIDIA/spark-rapids/pull/1787)|support distributed file path in cloud environment| -|[#1961](https://github.com/NVIDIA/spark-rapids/pull/1961)|change premege GPU_TYPE from secret to global env [skip ci]| -|[#1957](https://github.com/NVIDIA/spark-rapids/pull/1957)|Update Spark 3.1.2 shim for float upcast behavior| -|[#1889](https://github.com/NVIDIA/spark-rapids/pull/1889)|Decimal DIV changes | -|[#1947](https://github.com/NVIDIA/spark-rapids/pull/1947)|Move doc of Pandas UDF to additional-functionality| -|[#1938](https://github.com/NVIDIA/spark-rapids/pull/1938)|Add spark.executor.resource.gpu.amount=1 to YARN and K8s docs| -|[#1937](https://github.com/NVIDIA/spark-rapids/pull/1937)|Fix merge conflict with branch-0.4| -|[#1878](https://github.com/NVIDIA/spark-rapids/pull/1878)|spillable cache for GpuCartesianRDD| -|[#1843](https://github.com/NVIDIA/spark-rapids/pull/1843)|Refactor GpuGenerateExec and Explode| -|[#1933](https://github.com/NVIDIA/spark-rapids/pull/1933)|Split DB scripts to make them common for the build and IT pipeline| -|[#1935](https://github.com/NVIDIA/spark-rapids/pull/1935)|Update Alias SQL quoting and float-to-timestamp casting to match Spark 3.2| -|[#1926](https://github.com/NVIDIA/spark-rapids/pull/1926)|Consolidate RAT settings in parent pom| -|[#1918](https://github.com/NVIDIA/spark-rapids/pull/1918)|Minor code cleanup in dateTImeExpressions| -|[#1906](https://github.com/NVIDIA/spark-rapids/pull/1906)|Remove get call on timeZoneId| -|[#1908](https://github.com/NVIDIA/spark-rapids/pull/1908)|Remove the Scala version of Mortgage ETL tests from nightly test| -|[#1894](https://github.com/NVIDIA/spark-rapids/pull/1894)|Modified Download Page to re-order the items and change the format of download links| -|[#1909](https://github.com/NVIDIA/spark-rapids/pull/1909)|Avoid pinned memory for shuffle host buffers| -|[#1891](https://github.com/NVIDIA/spark-rapids/pull/1891)|Connect UCX endpoints early during app startup| -|[#1877](https://github.com/NVIDIA/spark-rapids/pull/1877)|remove docker build in pre-merge [skip ci]| -|[#1830](https://github.com/NVIDIA/spark-rapids/pull/1830)|Enable the tests for collect over window.| -|[#1882](https://github.com/NVIDIA/spark-rapids/pull/1882)|GpuArrowColumnarBatchBuilder retains the references of ArrowBuf until HostToGpuCoalesceIterator put them into device| -|[#1868](https://github.com/NVIDIA/spark-rapids/pull/1868)|Increase row limit when doing count() for HostColumnarToGpu | -|[#1855](https://github.com/NVIDIA/spark-rapids/pull/1855)|Expose row count statistics in GpuShuffleExchangeExec| -|[#1875](https://github.com/NVIDIA/spark-rapids/pull/1875)|Fix merge conflict with branch-0.4| -|[#1841](https://github.com/NVIDIA/spark-rapids/pull/1841)|Add in support for DateAddInterval| -|[#1869](https://github.com/NVIDIA/spark-rapids/pull/1869)|Fix tests for Spark 3.2.0 shim| -|[#1858](https://github.com/NVIDIA/spark-rapids/pull/1858)|fix shuffle manager doc on ucx library path| -|[#1836](https://github.com/NVIDIA/spark-rapids/pull/1836)|Add shim for Spark 3.1.2| -|[#1852](https://github.com/NVIDIA/spark-rapids/pull/1852)|Fix Part Suite Tests| -|[#1616](https://github.com/NVIDIA/spark-rapids/pull/1616)|Cost-based optimizer| -|[#1834](https://github.com/NVIDIA/spark-rapids/pull/1834)|Add shim for Spark 3.0.3| -|[#1839](https://github.com/NVIDIA/spark-rapids/pull/1839)|Refactor join code to reduce duplicated code| -|[#1848](https://github.com/NVIDIA/spark-rapids/pull/1848)|Fix merge conflict with branch-0.4| -|[#1796](https://github.com/NVIDIA/spark-rapids/pull/1796)|Have most of range partitioning run on the GPU| -|[#1845](https://github.com/NVIDIA/spark-rapids/pull/1845)|Fix fails on the mortgage ETL test| -|[#1829](https://github.com/NVIDIA/spark-rapids/pull/1829)|Cleanup unused Jenkins files and scripts| -|[#1704](https://github.com/NVIDIA/spark-rapids/pull/1704)|Create a shim for Spark 3.2.0 development| -|[#1838](https://github.com/NVIDIA/spark-rapids/pull/1838)|Make databricks build.sh more convenient for dev| -|[#1835](https://github.com/NVIDIA/spark-rapids/pull/1835)|Fix merge conflict with branch-0.4| -|[#1808](https://github.com/NVIDIA/spark-rapids/pull/1808)|Update mortgage tests to support reading multiple dataset formats| -|[#1822](https://github.com/NVIDIA/spark-rapids/pull/1822)|Fix conflict 0.4 to 0.5| -|[#1807](https://github.com/NVIDIA/spark-rapids/pull/1807)|Fix merge conflict between branch-0.4 and branch-0.5| -|[#1788](https://github.com/NVIDIA/spark-rapids/pull/1788)|Spill metrics everywhere| -|[#1719](https://github.com/NVIDIA/spark-rapids/pull/1719)|Add in out of core sort| -|[#1728](https://github.com/NVIDIA/spark-rapids/pull/1728)|Skip RAPIDS accelerated Java UDF tests if UDF fails to load| -|[#1689](https://github.com/NVIDIA/spark-rapids/pull/1689)|Update docs for plugin 0.5.0-SNAPSHOT and cudf 0.19-SNAPSHOT| -|[#1682](https://github.com/NVIDIA/spark-rapids/pull/1682)|init CI/CD dependencies branch-0.5| - -## Release 0.4.1 - -### Bugs Fixed -||| -|:---|:---| -|[#1985](https://github.com/NVIDIA/spark-rapids/issues/1985)|[BUG] broadcast exchange can fail on 0.4| - -### PRs -||| -|:---|:---| -|[#1995](https://github.com/NVIDIA/spark-rapids/pull/1995)|update changelog 0.4.1 [skip ci]| -|[#1990](https://github.com/NVIDIA/spark-rapids/pull/1990)|Prepare for v0.4.1 release| -|[#1988](https://github.com/NVIDIA/spark-rapids/pull/1988)|broadcast exchange can fail when job group set| - -## Release 0.4 - -### Features -||| -|:---|:---| -|[#1773](https://github.com/NVIDIA/spark-rapids/issues/1773)|[FEA] Spark 3.0.2 release support| -|[#80](https://github.com/NVIDIA/spark-rapids/issues/80)|[FEA] Support the struct SQL function| -|[#76](https://github.com/NVIDIA/spark-rapids/issues/76)|[FEA] Support CreateArray| -|[#1635](https://github.com/NVIDIA/spark-rapids/issues/1635)|[FEA] RAPIDS accelerated Java UDF| -|[#1333](https://github.com/NVIDIA/spark-rapids/issues/1333)|[FEA] Support window operations on Decimal| -|[#1419](https://github.com/NVIDIA/spark-rapids/issues/1419)|[FEA] Support GPU accelerated UDF alternative for higher order function "aggregate" over window| -|[#1580](https://github.com/NVIDIA/spark-rapids/issues/1580)|[FEA] Support Decimal for ParquetCachedBatchSerializer| -|[#1600](https://github.com/NVIDIA/spark-rapids/issues/1600)|[FEA] Support ScalarSubquery| -|[#1072](https://github.com/NVIDIA/spark-rapids/issues/1072)|[FEA] Support for a custom DataSource V2 which supplies Arrow data| -|[#906](https://github.com/NVIDIA/spark-rapids/issues/906)|[FEA] Clarify query explanation to directly state what will run on GPU| -|[#1335](https://github.com/NVIDIA/spark-rapids/issues/1335)|[FEA] Support CollectLimitExec for decimal| -|[#1485](https://github.com/NVIDIA/spark-rapids/issues/1485)|[FEA] Decimal Support for Parquet Write| -|[#1329](https://github.com/NVIDIA/spark-rapids/issues/1329)|[FEA] Decimal support for multiply int div, add, subtract and null safe equals| -|[#1351](https://github.com/NVIDIA/spark-rapids/issues/1351)|[FEA] Execute UDFs that provide a RAPIDS execution path| -|[#1330](https://github.com/NVIDIA/spark-rapids/issues/1330)|[FEA] Support Decimal Casts| -|[#1353](https://github.com/NVIDIA/spark-rapids/issues/1353)|[FEA] Example of RAPIDS UDF using custom GPU code| -|[#1487](https://github.com/NVIDIA/spark-rapids/issues/1487)|[FEA] Change spark 3.1.0 to 3.1.1| -|[#1334](https://github.com/NVIDIA/spark-rapids/issues/1334)|[FEA] Add support for count aggregate on decimal| -|[#1325](https://github.com/NVIDIA/spark-rapids/issues/1325)|[FEA] Add in join support for decimal| -|[#1326](https://github.com/NVIDIA/spark-rapids/issues/1326)|[FEA] Add in Broadcast support for decimal values| -|[#37](https://github.com/NVIDIA/spark-rapids/issues/37)|[FEA] round and bround SQL functions| -|[#78](https://github.com/NVIDIA/spark-rapids/issues/78)|[FEA] Support CreateNamedStruct function| -|[#1331](https://github.com/NVIDIA/spark-rapids/issues/1331)|[FEA] UnionExec and ExpandExec support for decimal| -|[#1332](https://github.com/NVIDIA/spark-rapids/issues/1332)|[FEA] Support CaseWhen, Coalesce and IfElse for decimal| -|[#937](https://github.com/NVIDIA/spark-rapids/issues/937)|[FEA] have murmur3 hash function that matches exactly with spark| -|[#1324](https://github.com/NVIDIA/spark-rapids/issues/1324)|[FEA] Support Parquet Read of Decimal FIXED_LENGTH_BYTE_ARRAY| -|[#1428](https://github.com/NVIDIA/spark-rapids/issues/1428)|[FEA] Add support for unary decimal operations abs, floor, ceil, unary - and unary +| -|[#1375](https://github.com/NVIDIA/spark-rapids/issues/1375)|[FEA] Add log statement for what the concurrentGpuTasks tasks is set to on executor startup| -|[#1352](https://github.com/NVIDIA/spark-rapids/issues/1352)|[FEA] Example of RAPIDS UDF using cudf Java APIs| -|[#1328](https://github.com/NVIDIA/spark-rapids/issues/1328)|[FEA] Support sorting and shuffle of decimal| -|[#1316](https://github.com/NVIDIA/spark-rapids/issues/1316)|[FEA] Support simple DECIMAL aggregates| - -### Performance -||| -|:---|:---| -|[#1435](https://github.com/NVIDIA/spark-rapids/issues/1435)|[FEA]Improve the file reading by using local file caching| -|[#1738](https://github.com/NVIDIA/spark-rapids/issues/1738)|[FEA] Reduce regex usage in CAST string to date/timestamp| -|[#987](https://github.com/NVIDIA/spark-rapids/issues/987)|[FEA] Optimize CAST from string to temporal types by using cuDF is_timestamp function| -|[#1594](https://github.com/NVIDIA/spark-rapids/issues/1594)|[FEA] RAPIDS accelerated ScalaUDF| -|[#103](https://github.com/NVIDIA/spark-rapids/issues/103)|[FEA] GPU version of TakeOrderedAndProject| -|[#1024](https://github.com/NVIDIA/spark-rapids/issues/1024)|Cleanup RAPIDS transport calls to `receive`| -|[#1366](https://github.com/NVIDIA/spark-rapids/issues/1366)|Seeing performance differences of multi-threaded/coalesce/perfile Parquet reader type for a single file| -|[#1200](https://github.com/NVIDIA/spark-rapids/issues/1200)|[FEA] Accelerate the scan speed for coalescing parquet reader when reading files from multiple partitioned folders| - -### Bugs Fixed -||| -|:---|:---| -|[#1885](https://github.com/NVIDIA/spark-rapids/issues/1885)|[BUG] natural join on string key results in a data frame with spurious NULLs| -|[#1785](https://github.com/NVIDIA/spark-rapids/issues/1785)|[BUG] Rapids pytest integration tests FAILED on Yarn cluster with unrecognized arguments: `--std_input_path=src/test/resources/`| -|[#999](https://github.com/NVIDIA/spark-rapids/issues/999)|[BUG] test_multi_types_window_aggs_for_rows_lead_lag fails against Spark 3.1.0| -|[#1818](https://github.com/NVIDIA/spark-rapids/issues/1818)|[BUG] unmoored doc comment warnings in GpuCast| -|[#1817](https://github.com/NVIDIA/spark-rapids/issues/1817)|[BUG] Developer build with local modifications fails during verify phase| -|[#1644](https://github.com/NVIDIA/spark-rapids/issues/1644)|[BUG] test_window_aggregate_udf_array_from_python fails on databricks| -|[#1771](https://github.com/NVIDIA/spark-rapids/issues/1771)|[BUG] Databricks AWS CI/CD failing to create cluster| -|[#1157](https://github.com/NVIDIA/spark-rapids/issues/1157)|[BUG] Fix regression supporting to_date on GPU with Spark 3.1.0| -|[#716](https://github.com/NVIDIA/spark-rapids/issues/716)|[BUG] Cast String to TimeStamp issues| -|[#1117](https://github.com/NVIDIA/spark-rapids/issues/1117)|[BUG] CAST string to date returns wrong values for dates with out-of-range values| -|[#1670](https://github.com/NVIDIA/spark-rapids/issues/1670)|[BUG] Some TPC-DS queries fail with AQE when decimal types enabled| -|[#1730](https://github.com/NVIDIA/spark-rapids/issues/1730)|[BUG] Range Partitioning can crash when processing is in the order-by| -|[#1726](https://github.com/NVIDIA/spark-rapids/issues/1726)|[BUG] java url decode test failing on databricks, emr, and dataproc| -|[#1651](https://github.com/NVIDIA/spark-rapids/issues/1651)|[BUG] GDS exception when writing shuffle file| -|[#1702](https://github.com/NVIDIA/spark-rapids/issues/1702)|[BUG] check all tests marked xfail for Spark 3.1.1| -|[#575](https://github.com/NVIDIA/spark-rapids/issues/575)|[BUG] Spark 3.1 FAILED join_test.py::test_broadcast_join_mixed[FullOuter][IGNORE_ORDER] failed| -|[#577](https://github.com/NVIDIA/spark-rapids/issues/577)|[BUG] Spark 3.1 log arithmetic functions fail| -|[#1541](https://github.com/NVIDIA/spark-rapids/issues/1541)|[BUG] Tests fail in integration in distributed mode after allowing nested types through in sort and shuffle| -|[#1626](https://github.com/NVIDIA/spark-rapids/issues/1626)|[BUG] TPC-DS-like query 77 at scale=3TB fails with maxResultSize exceeded error| -|[#1576](https://github.com/NVIDIA/spark-rapids/issues/1576)|[BUG] loading SPARK-32639 example parquet file triggers a JVM crash | -|[#1643](https://github.com/NVIDIA/spark-rapids/issues/1643)|[BUG] TPC-DS-Like q10, q35, and q69 - slow or hanging at leftSemiJoin| -|[#1650](https://github.com/NVIDIA/spark-rapids/issues/1650)|[BUG] BenchmarkRunner does not include query name in JSON summary filename when running multiple queries| -|[#1654](https://github.com/NVIDIA/spark-rapids/issues/1654)|[BUG] TPC-DS-like query 59 at scale=3TB with AQE fails with join mismatch| -|[#1274](https://github.com/NVIDIA/spark-rapids/issues/1274)|[BUG] OutOfMemoryError - Maximum pool size exceeded while running 24 day criteo ETL Transform stage| -|[#1497](https://github.com/NVIDIA/spark-rapids/issues/1497)|[BUG] Spark-rapids v0.3.0 pytest integration tests with UCX on FAILED on Yarn cluster| -|[#1534](https://github.com/NVIDIA/spark-rapids/issues/1534)|[BUG] Spark 3.1.1 test failure in writing due to removal of InMemoryFileIndex.shouldFilterOut| -|[#1155](https://github.com/NVIDIA/spark-rapids/issues/1155)|[BUG] on shutdown don't print `Socket closed` exception when shutting down UCX.scala| -|[#1510](https://github.com/NVIDIA/spark-rapids/issues/1510)|[BUG] IllegalArgumentException during shuffle| -|[#1513](https://github.com/NVIDIA/spark-rapids/issues/1513)|[BUG] executor not fully initialized may get calls from Spark, in the process setting the `catalog` incorrectly| -|[#1466](https://github.com/NVIDIA/spark-rapids/issues/1466)|[BUG] Databricks build must run before the rapids nightly| -|[#1456](https://github.com/NVIDIA/spark-rapids/issues/1456)|[BUG] Databricks 0.4 parquet integration tests fail| -|[#1400](https://github.com/NVIDIA/spark-rapids/issues/1400)|[BUG] Regressions in spark-shell usage of benchmark utilities| -|[#1119](https://github.com/NVIDIA/spark-rapids/issues/1119)|[BUG] inner join fails with Column size cannot be negative| -|[#1079](https://github.com/NVIDIA/spark-rapids/issues/1079)|[BUG]The Scala UDF function cannot invoke the UDF compiler when it's passed to "explode"| -|[#1298](https://github.com/NVIDIA/spark-rapids/issues/1298)|TPCxBB query16 failed at UnsupportedOperationException: org.apache.parquet.column.values.dictionary.PlainValuesDictionary$PlainIntegerDictionary| -|[#1271](https://github.com/NVIDIA/spark-rapids/issues/1271)|[BUG] CastOpSuite and AnsiCastOpSuite failing with ArithmeticException on Spark 3.1| -|[#84](https://github.com/NVIDIA/spark-rapids/issues/84)|[BUG] sort does not match spark for -0.0 and 0.0| -|[#578](https://github.com/NVIDIA/spark-rapids/issues/578)|[BUG] Spark 3.1 qa_nightly_select_test.py Full join test failures| -|[#586](https://github.com/NVIDIA/spark-rapids/issues/586)|[BUG] Spark3.1 tpch failures| -|[#837](https://github.com/NVIDIA/spark-rapids/issues/837)|[BUG] Distinct count of floating point values differs with regular spark| -|[#953](https://github.com/NVIDIA/spark-rapids/issues/953)|[BUG] 3.1.0 pos_explode tests are failing| -|[#127](https://github.com/NVIDIA/spark-rapids/issues/127)|[BUG] String CSV parsing does not respect nullValues| -|[#1203](https://github.com/NVIDIA/spark-rapids/issues/1203)|[BUG] tpcds query 51 fails with join error on Spark 3.1.0| -|[#750](https://github.com/NVIDIA/spark-rapids/issues/750)|[BUG] udf_cudf_test::test_with_column fails with IPC error | -|[#1348](https://github.com/NVIDIA/spark-rapids/issues/1348)|[BUG] Host columnar decimal conversions are failing| -|[#1270](https://github.com/NVIDIA/spark-rapids/issues/1270)|[BUG] Benchmark runner fails to produce report if benchmark fails due to an invalid query plan| -|[#1179](https://github.com/NVIDIA/spark-rapids/issues/1179)|[BUG] SerializeConcatHostBuffersDeserializeBatch may have thread issues| -|[#1115](https://github.com/NVIDIA/spark-rapids/issues/1115)|[BUG] Unchecked type warning in SparkQueryCompareTestSuite| - -### PRs -||| -|:---|:---| -|[#1963](https://github.com/NVIDIA/spark-rapids/pull/1963)|Update changelog 0.4 [skip ci]| -|[#1960](https://github.com/NVIDIA/spark-rapids/pull/1960)|Replace sonatype staging link with maven central link| -|[#1945](https://github.com/NVIDIA/spark-rapids/pull/1945)|Update changelog 0.4 [skip ci]| -|[#1910](https://github.com/NVIDIA/spark-rapids/pull/1910)|Make hash partitioning match CPU| -|[#1927](https://github.com/NVIDIA/spark-rapids/pull/1927)|Change cuDF dependency to 0.18.1| -|[#1934](https://github.com/NVIDIA/spark-rapids/pull/1934)|Update documentation to use cudf version 0.18.1| -|[#1871](https://github.com/NVIDIA/spark-rapids/pull/1871)|Disable coalesce batch spilling to avoid cudf contiguous_split bug| -|[#1849](https://github.com/NVIDIA/spark-rapids/pull/1849)|Update changelog for 0.4| -|[#1744](https://github.com/NVIDIA/spark-rapids/pull/1744)|Fix NullPointerException on null partition insert| -|[#1842](https://github.com/NVIDIA/spark-rapids/pull/1842)|Update to note support for 3.0.2| -|[#1832](https://github.com/NVIDIA/spark-rapids/pull/1832)|Spark 3.1.1 shim no longer a snapshot shim| -|[#1831](https://github.com/NVIDIA/spark-rapids/pull/1831)|Spark 3.0.2 shim no longer a snapshot shim| -|[#1826](https://github.com/NVIDIA/spark-rapids/pull/1826)|Remove benchmarks| -|[#1828](https://github.com/NVIDIA/spark-rapids/pull/1828)|Update cudf dependency to 0.18| -|[#1813](https://github.com/NVIDIA/spark-rapids/pull/1813)|Fix LEAD/LAG failures in Spark 3.1.1| -|[#1819](https://github.com/NVIDIA/spark-rapids/pull/1819)|Fix scaladoc warning in GpuCast| -|[#1820](https://github.com/NVIDIA/spark-rapids/pull/1820)|[BUG] make modified check pre-merge only| -|[#1780](https://github.com/NVIDIA/spark-rapids/pull/1780)|Remove SNAPSHOT from test and integration_test READMEs| -|[#1809](https://github.com/NVIDIA/spark-rapids/pull/1809)|check if modified files after update_config/supported| -|[#1804](https://github.com/NVIDIA/spark-rapids/pull/1804)|Update UCX documentation for RX_QUEUE_LEN and Docker| -|[#1810](https://github.com/NVIDIA/spark-rapids/pull/1810)|Pandas UDF: Sort the data before computing the sum.| -|[#1751](https://github.com/NVIDIA/spark-rapids/pull/1751)|Exclude foldable expressions from GPU if constant folding is disabled| -|[#1798](https://github.com/NVIDIA/spark-rapids/pull/1798)|Add documentation about explain not on GPU when AQE is on| -|[#1766](https://github.com/NVIDIA/spark-rapids/pull/1766)|Branch 0.4 release docs| -|[#1794](https://github.com/NVIDIA/spark-rapids/pull/1794)|Build python output schema from udf expressions| -|[#1783](https://github.com/NVIDIA/spark-rapids/pull/1783)|Fix the collect_list over window tests failures on db| -|[#1781](https://github.com/NVIDIA/spark-rapids/pull/1781)|Better float/double cases for casting tests| -|[#1790](https://github.com/NVIDIA/spark-rapids/pull/1790)|Record row counts in benchmark runs that call collect| -|[#1779](https://github.com/NVIDIA/spark-rapids/pull/1779)|Add support of DateType and TimestampType for GetTimestamp expression| -|[#1768](https://github.com/NVIDIA/spark-rapids/pull/1768)|Updating getting started Databricks docs| -|[#1742](https://github.com/NVIDIA/spark-rapids/pull/1742)|Fix regression supporting to_date with Spark-3.1| -|[#1775](https://github.com/NVIDIA/spark-rapids/pull/1775)|Fix ambiguous ordering for some tests| -|[#1760](https://github.com/NVIDIA/spark-rapids/pull/1760)|Update GpuDataSourceScanExec and GpuBroadcastExchangeExec to fix audit issues| -|[#1750](https://github.com/NVIDIA/spark-rapids/pull/1750)|Detect task failures in benchmarks| -|[#1767](https://github.com/NVIDIA/spark-rapids/pull/1767)|Consistent Spark version for test and production| -|[#1741](https://github.com/NVIDIA/spark-rapids/pull/1741)|Reduce regex use in CAST| -|[#1756](https://github.com/NVIDIA/spark-rapids/pull/1756)|Skip RAPIDS accelerated Java UDF tests if UDF fails to load| -|[#1716](https://github.com/NVIDIA/spark-rapids/pull/1716)|Update RapidsShuffleManager documentation for branch 0.4| -|[#1740](https://github.com/NVIDIA/spark-rapids/pull/1740)|Disable ORC writes until bug can be fixed| -|[#1747](https://github.com/NVIDIA/spark-rapids/pull/1747)|Fix resource leaks in unit tests| -|[#1725](https://github.com/NVIDIA/spark-rapids/pull/1725)|Branch 0.4 FAQ reorg| -|[#1718](https://github.com/NVIDIA/spark-rapids/pull/1718)|CAST string to temporal type now calls isTimestamp| -|[#1734](https://github.com/NVIDIA/spark-rapids/pull/1734)|Disable range partitioning if computation is needed| -|[#1723](https://github.com/NVIDIA/spark-rapids/pull/1723)|Removed StructTypes support for ParquetCachedBatchSerializer as cudf doesn't support it yet| -|[#1714](https://github.com/NVIDIA/spark-rapids/pull/1714)|Add support for RAPIDS accelerated Java UDFs| -|[#1713](https://github.com/NVIDIA/spark-rapids/pull/1713)|Call GpuDeviceManager.shutdown when the executor plugin is shutting down| -|[#1596](https://github.com/NVIDIA/spark-rapids/pull/1596)|Added in Decimal support to ParquetCachedBatchSerializer| -|[#1706](https://github.com/NVIDIA/spark-rapids/pull/1706)|cleanup unused is_before_spark_310| -|[#1685](https://github.com/NVIDIA/spark-rapids/pull/1685)|Fix CustomShuffleReader replacement when decimal types enabled| -|[#1699](https://github.com/NVIDIA/spark-rapids/pull/1699)|Add docs about Spark 3.1 in standalone modes not needing extra class path| -|[#1701](https://github.com/NVIDIA/spark-rapids/pull/1701)|remove xfail for orc test_input_meta for spark 3.1.0| -|[#1703](https://github.com/NVIDIA/spark-rapids/pull/1703)|Remove xfail for spark 3.1.0 test_broadcast_join_mixed FullOuter| -|[#1676](https://github.com/NVIDIA/spark-rapids/pull/1676)|BenchmarkRunner option to generate query plan diagrams in DOT format| -|[#1695](https://github.com/NVIDIA/spark-rapids/pull/1695)|support alternate jar paths| -|[#1694](https://github.com/NVIDIA/spark-rapids/pull/1694)|increase mem and limit parallelism for pre-merge| -|[#1691](https://github.com/NVIDIA/spark-rapids/pull/1691)|add validate_execs_in_gpu_plan to pytest.ini| -|[#1692](https://github.com/NVIDIA/spark-rapids/pull/1692)|Add the integration test resources to the test tarball| -|[#1677](https://github.com/NVIDIA/spark-rapids/pull/1677)|When PTDS is enabled, print warning if the allocator is not ARENA| -|[#1683](https://github.com/NVIDIA/spark-rapids/pull/1683)|update changelog to verify autotmerge 0.5 setup [skip ci]| -|[#1673](https://github.com/NVIDIA/spark-rapids/pull/1673)|support auto-merge for branch 0.5 [skip ci]| -|[#1681](https://github.com/NVIDIA/spark-rapids/pull/1681)|Xfail the collect_list tests for databricks| -|[#1678](https://github.com/NVIDIA/spark-rapids/pull/1678)|Fix array/struct checks in Sort and HashAggregate and sorting tests in distributed mode| -|[#1671](https://github.com/NVIDIA/spark-rapids/pull/1671)|Allow metrics to be configurable by level| -|[#1675](https://github.com/NVIDIA/spark-rapids/pull/1675)|add run_pyspark_from_build.sh to the pytest distribution tarball| -|[#1548](https://github.com/NVIDIA/spark-rapids/pull/1548)|Support executing collect_list on GPU with windowing.| -|[#1593](https://github.com/NVIDIA/spark-rapids/pull/1593)|Avoid unnecessary Table instances after contiguous split| -|[#1592](https://github.com/NVIDIA/spark-rapids/pull/1592)|Add in support for Decimal divide| -|[#1668](https://github.com/NVIDIA/spark-rapids/pull/1668)|Implement way for python integration tests to validate Exec is in GPU plan| -|[#1669](https://github.com/NVIDIA/spark-rapids/pull/1669)|Add FAQ entries for executor-per-GPU questions| -|[#1661](https://github.com/NVIDIA/spark-rapids/pull/1661)|Enable Parquet test for file containing map struct key| -|[#1664](https://github.com/NVIDIA/spark-rapids/pull/1664)|Filter nulls for left semi and left anti join to work around cudf| -|[#1665](https://github.com/NVIDIA/spark-rapids/pull/1665)|Add better automated tests for Arrow columnar copy in HostColumnarToGpu| -|[#1614](https://github.com/NVIDIA/spark-rapids/pull/1614)|add alluxio getting start document| -|[#1639](https://github.com/NVIDIA/spark-rapids/pull/1639)|support GpuScalarSubquery| -|[#1656](https://github.com/NVIDIA/spark-rapids/pull/1656)|Move UDF to Catalyst Expressions to its own document| -|[#1663](https://github.com/NVIDIA/spark-rapids/pull/1663)|BenchmarkRunner - Include query name in JSON summary filename| -|[#1655](https://github.com/NVIDIA/spark-rapids/pull/1655)|Fix extraneous shuffles added by AQE| -|[#1652](https://github.com/NVIDIA/spark-rapids/pull/1652)|Fix typo in arrow optimized config name - spark.rapids.arrowCopyOptimizationEnabled| -|[#1645](https://github.com/NVIDIA/spark-rapids/pull/1645)|Run Databricks IT with python-xdist parallel, includes test fixes and xfail| -|[#1649](https://github.com/NVIDIA/spark-rapids/pull/1649)|Move building from source docs to contributing guide| -|[#1637](https://github.com/NVIDIA/spark-rapids/pull/1637)|Fail DivModLike on zero divisor in ANSI mode| -|[#1646](https://github.com/NVIDIA/spark-rapids/pull/1646)|Update links in rapids-udfs.md after moving to subfolder| -|[#1641](https://github.com/NVIDIA/spark-rapids/pull/1641)|Xfail struct and array order by tests on Dataproc| -|[#1565](https://github.com/NVIDIA/spark-rapids/pull/1565)|Add GPU accelerated array_contains operator| -|[#1617](https://github.com/NVIDIA/spark-rapids/pull/1617)|Enable nightly test checks for Apache Spark| -|[#1636](https://github.com/NVIDIA/spark-rapids/pull/1636)|RAPIDS accelerated Spark Scala UDF support| -|[#1634](https://github.com/NVIDIA/spark-rapids/pull/1634)|Fix databricks build since Arrow code added| -|[#1599](https://github.com/NVIDIA/spark-rapids/pull/1599)|Add division by zero tests for Spark 3.1 behavior| -|[#1619](https://github.com/NVIDIA/spark-rapids/pull/1619)|Update GpuFileSourceScanExec to be in sync with DataSourceScanExec| -|[#1631](https://github.com/NVIDIA/spark-rapids/pull/1631)|Explicitly add maven-jar-plugin version to improve incremental build time.| -|[#1624](https://github.com/NVIDIA/spark-rapids/pull/1624)|Update explain format to show what will and will not run on the GPU| -|[#1622](https://github.com/NVIDIA/spark-rapids/pull/1622)|Support faster copy for a custom DataSource V2 which supplies Arrow data| -|[#1621](https://github.com/NVIDIA/spark-rapids/pull/1621)|Additional functionality docs| -|[#1618](https://github.com/NVIDIA/spark-rapids/pull/1618)|update blossom-ci for security updates [skip ci]| -|[#1562](https://github.com/NVIDIA/spark-rapids/pull/1562)|add alluxio support| -|[#1597](https://github.com/NVIDIA/spark-rapids/pull/1597)|Documentation for Parquet serializer| -|[#1611](https://github.com/NVIDIA/spark-rapids/pull/1611)|Add in flag for integration tests to not skip required tests| -|[#1609](https://github.com/NVIDIA/spark-rapids/pull/1609)|Disable float round/bround by default| -|[#1615](https://github.com/NVIDIA/spark-rapids/pull/1615)|Add in window support for average| -|[#1610](https://github.com/NVIDIA/spark-rapids/pull/1610)|Limit length of spark app name in BenchmarkRunner| -|[#1579](https://github.com/NVIDIA/spark-rapids/pull/1579)|Support TakeOrderedAndProject| -|[#1581](https://github.com/NVIDIA/spark-rapids/pull/1581)|Support Decimal type for CollectLimitExec| -|[#1591](https://github.com/NVIDIA/spark-rapids/pull/1591)|Add support for running multiple queries in BenchmarkRunner| -|[#1595](https://github.com/NVIDIA/spark-rapids/pull/1595)|Fix Github documentation issue template| -|[#1577](https://github.com/NVIDIA/spark-rapids/pull/1577)|rename directory from spark310 to spark311| -|[#1578](https://github.com/NVIDIA/spark-rapids/pull/1578)|Test to track RAPIDS-side issues re SPARK-32639| -|[#1583](https://github.com/NVIDIA/spark-rapids/pull/1583)|fix request-action issue [skip ci]| -|[#1555](https://github.com/NVIDIA/spark-rapids/pull/1555)|Enable ANSI mode for CAST string to timestamp| -|[#1531](https://github.com/NVIDIA/spark-rapids/pull/1531)|Decimal Support for writing Parquet| -|[#1545](https://github.com/NVIDIA/spark-rapids/pull/1545)|Support comparing ORC data| -|[#1570](https://github.com/NVIDIA/spark-rapids/pull/1570)|Branch 0.4 doc cleanup| -|[#1569](https://github.com/NVIDIA/spark-rapids/pull/1569)|Add shim method shouldIgnorePath| -|[#1564](https://github.com/NVIDIA/spark-rapids/pull/1564)|Add in support for Decimal Multiply and DIV| -|[#1561](https://github.com/NVIDIA/spark-rapids/pull/1561)|Decimal support for add and subtract| -|[#1560](https://github.com/NVIDIA/spark-rapids/pull/1560)|support sum in window aggregation for decimal| -|[#1546](https://github.com/NVIDIA/spark-rapids/pull/1546)|Cleanup shutdown logging for UCX shuffle| -|[#1551](https://github.com/NVIDIA/spark-rapids/pull/1551)|RAPIDS-accelerated Hive UDFs support all types| -|[#1543](https://github.com/NVIDIA/spark-rapids/pull/1543)|Shuffle/transport enabled by default| -|[#1552](https://github.com/NVIDIA/spark-rapids/pull/1552)|Disable blackduck signature check| -|[#1540](https://github.com/NVIDIA/spark-rapids/pull/1540)|Handle ShuffleManager api calls when plugin is not fully initialized| -|[#1547](https://github.com/NVIDIA/spark-rapids/pull/1547)|Cleanup shuffle transport receive calls| -|[#1512](https://github.com/NVIDIA/spark-rapids/pull/1512)|Support window operations on Decimal| -|[#1532](https://github.com/NVIDIA/spark-rapids/pull/1532)|Support casting from decimal to decimal| -|[#1542](https://github.com/NVIDIA/spark-rapids/pull/1542)|Change the number of partitions to zero when a range is empty| -|[#1506](https://github.com/NVIDIA/spark-rapids/pull/1506)|Add --use-decimals flag to TPC-DS ConvertFiles| -|[#1511](https://github.com/NVIDIA/spark-rapids/pull/1511)|Remove unused Jenkinsfiles [skip ci]| -|[#1505](https://github.com/NVIDIA/spark-rapids/pull/1505)|Add least, greatest and eqNullSafe support for DecimalType| -|[#1484](https://github.com/NVIDIA/spark-rapids/pull/1484)|add doc for nsight systems bundled with cuda toolkit| -|[#1478](https://github.com/NVIDIA/spark-rapids/pull/1478)|Documentation for RAPIDS-accelerated Hive UDFs| -|[#1477](https://github.com/NVIDIA/spark-rapids/pull/1477)|Allow structs and arrays to pass through for Shuffle and Sort | -|[#1489](https://github.com/NVIDIA/spark-rapids/pull/1489)|Adds in some support for the array sql function| -|[#1438](https://github.com/NVIDIA/spark-rapids/pull/1438)|Cast from numeric types to decimal type| -|[#1493](https://github.com/NVIDIA/spark-rapids/pull/1493)|Moved ParquetRecordMaterializer to the shim package to follow convention| -|[#1495](https://github.com/NVIDIA/spark-rapids/pull/1495)|Fix merge conflict, merge branch 0.3 to branch 0.4 [skip ci]| -|[#1472](https://github.com/NVIDIA/spark-rapids/pull/1472)|Add an example RAPIDS-accelerated Hive UDF using native code| -|[#1488](https://github.com/NVIDIA/spark-rapids/pull/1488)|Rename Spark 3.1.0 shim to Spark 3.1.1 to match community| -|[#1474](https://github.com/NVIDIA/spark-rapids/pull/1474)|Fix link| -|[#1476](https://github.com/NVIDIA/spark-rapids/pull/1476)|DecimalType support for Aggregate Count| -|[#1475](https://github.com/NVIDIA/spark-rapids/pull/1475)| Join support for DecimalType| -|[#1244](https://github.com/NVIDIA/spark-rapids/pull/1244)|Support round and bround SQL functions | -|[#1458](https://github.com/NVIDIA/spark-rapids/pull/1458)|Add in support for struct and named_struct| -|[#1465](https://github.com/NVIDIA/spark-rapids/pull/1465)|DecimalType support for UnionExec and ExpandExec| -|[#1450](https://github.com/NVIDIA/spark-rapids/pull/1450)|Add dynamic configs for the spark-rapids IT pipelines| -|[#1207](https://github.com/NVIDIA/spark-rapids/pull/1207)|Spark SQL hash function using murmur3| -|[#1457](https://github.com/NVIDIA/spark-rapids/pull/1457)|Support reading decimal columns from parquet files on Databricks| -|[#1455](https://github.com/NVIDIA/spark-rapids/pull/1455)|Upgrade Scala Maven Plugin to 4.3.0| -|[#1453](https://github.com/NVIDIA/spark-rapids/pull/1453)|DecimalType support for IfElse and Coalesce| -|[#1452](https://github.com/NVIDIA/spark-rapids/pull/1452)|Support DecimalType for CaseWhen| -|[#1444](https://github.com/NVIDIA/spark-rapids/pull/1444)|Improve UX when running benchmarks from Spark shell| -|[#1294](https://github.com/NVIDIA/spark-rapids/pull/1294)|Support reading decimal columns from parquet files| -|[#1153](https://github.com/NVIDIA/spark-rapids/pull/1153)|Scala UDF will compile children expressions in Project| -|[#1416](https://github.com/NVIDIA/spark-rapids/pull/1416)|Optimize mvn dependency download scripts| -|[#1430](https://github.com/NVIDIA/spark-rapids/pull/1430)|Add project for testing code that requires Spark 3.1.0 or later| -|[#1425](https://github.com/NVIDIA/spark-rapids/pull/1425)|Add in Decimal support for abs, floor, ceil, unary - and unary +| -|[#1427](https://github.com/NVIDIA/spark-rapids/pull/1427)|Revert "Make the multi-threaded parquet reader the default"| -|[#1420](https://github.com/NVIDIA/spark-rapids/pull/1420)|Add udf jar to nightly integration tests| -|[#1422](https://github.com/NVIDIA/spark-rapids/pull/1422)|Log the number of concurrent gpu tasks allowed on Executor startup| -|[#1401](https://github.com/NVIDIA/spark-rapids/pull/1401)|Accelerate the coalescing parquet reader when reading files from multiple partitioned folders| -|[#1413](https://github.com/NVIDIA/spark-rapids/pull/1413)|Add config for cast float to integral types| -|[#1313](https://github.com/NVIDIA/spark-rapids/pull/1313)|Support spilling to disk directly via cuFile/GDS| -|[#1411](https://github.com/NVIDIA/spark-rapids/pull/1411)|Add udf-examples jar to databricks build| -|[#1412](https://github.com/NVIDIA/spark-rapids/pull/1412)|Fix a lot of tests marked with xfail for Spark 3.1.0 that no longer fail| -|[#1414](https://github.com/NVIDIA/spark-rapids/pull/1414)|Build merged code of HEAD and BASE branch for pre-merge [skip ci]| -|[#1409](https://github.com/NVIDIA/spark-rapids/pull/1409)|Add option to use decimals in tpc-ds csv to parquet conversion| -|[#1410](https://github.com/NVIDIA/spark-rapids/pull/1410)|Add Decimal support for In, InSet, AtLeastNNonNulls, GetArrayItem, GetStructField, and GenerateExec| -|[#1408](https://github.com/NVIDIA/spark-rapids/pull/1408)|Support RAPIDS-accelerated HiveGenericUDF| -|[#1407](https://github.com/NVIDIA/spark-rapids/pull/1407)|Update docs and tests for null CSV support| -|[#1393](https://github.com/NVIDIA/spark-rapids/pull/1393)|Support RAPIDS-accelerated HiveSimpleUDF| -|[#1392](https://github.com/NVIDIA/spark-rapids/pull/1392)|Turn on hash partitioning for decimal support| -|[#1402](https://github.com/NVIDIA/spark-rapids/pull/1402)|Better GPU Cast type checks| -|[#1404](https://github.com/NVIDIA/spark-rapids/pull/1404)|Fix branch 0.4 merge conflict| -|[#1323](https://github.com/NVIDIA/spark-rapids/pull/1323)|More advanced type checking and documentation| -|[#1391](https://github.com/NVIDIA/spark-rapids/pull/1391)|Remove extra null join filtering because cudf is fast for this now.| -|[#1395](https://github.com/NVIDIA/spark-rapids/pull/1395)|Fix branch-0.3 -> branch-0.4 automerge| -|[#1382](https://github.com/NVIDIA/spark-rapids/pull/1382)|Handle "MM[/-]dd" and "dd[/-]MM" datetime formats in UnixTimeExprMeta| -|[#1390](https://github.com/NVIDIA/spark-rapids/pull/1390)|Accelerated columnar to row/row to columnar for decimal| -|[#1380](https://github.com/NVIDIA/spark-rapids/pull/1380)|Adds in basic support for decimal sort, sum, and some shuffle| -|[#1367](https://github.com/NVIDIA/spark-rapids/pull/1367)|Reuse gpu expression conversion rules when checking sort order| -|[#1349](https://github.com/NVIDIA/spark-rapids/pull/1349)|Add canonicalization tests| -|[#1368](https://github.com/NVIDIA/spark-rapids/pull/1368)|Move to cudf 0.18-SNAPSHOT| -|[#1361](https://github.com/NVIDIA/spark-rapids/pull/1361)|Use the correct precision when reading spark columnar data.| -|[#1273](https://github.com/NVIDIA/spark-rapids/pull/1273)|Update docs and scripts to 0.4.0-SNAPSHOT| -|[#1321](https://github.com/NVIDIA/spark-rapids/pull/1321)|Refactor to stop inheriting from HashJoin| -|[#1311](https://github.com/NVIDIA/spark-rapids/pull/1311)|ParquetCachedBatchSerializer code cleanup| -|[#1303](https://github.com/NVIDIA/spark-rapids/pull/1303)|Add explicit outputOrdering for BHJ and SHJ in spark310 shim| -|[#1299](https://github.com/NVIDIA/spark-rapids/pull/1299)|Benchmark runner improved error handling| - -## Release 0.3 - -### Features -||| -|:---|:---| -|[#1002](https://github.com/NVIDIA/spark-rapids/issues/1002)|[FEA] RapidsHostColumnVectorCore should verify cudf data with respect to the expected spark type | -|[#444](https://github.com/NVIDIA/spark-rapids/issues/444)|[FEA] Plugable Cache| -|[#1158](https://github.com/NVIDIA/spark-rapids/issues/1158)|[FEA] Better documentation on type support| -|[#57](https://github.com/NVIDIA/spark-rapids/issues/57)|[FEA] Support INT96 for parquet reads and writes| -|[#1003](https://github.com/NVIDIA/spark-rapids/issues/1003)|[FEA] Reduce overlap between RapidsHostColumnVector and RapidsHostColumnVectorCore| -|[#913](https://github.com/NVIDIA/spark-rapids/issues/913)|[FEA] In Pluggable Cache Support CalendarInterval while creating CachedBatches| -|[#1092](https://github.com/NVIDIA/spark-rapids/issues/1092)|[FEA] In Pluggable Cache handle nested types having CalendarIntervalType and NullType| -|[#670](https://github.com/NVIDIA/spark-rapids/issues/670)|[FEA] Support NullType| -|[#50](https://github.com/NVIDIA/spark-rapids/issues/50)|[FEA] support `spark.sql.legacy.timeParserPolicy`| -|[#1144](https://github.com/NVIDIA/spark-rapids/issues/1144)|[FEA] Remove Databricks 3.0.0 shim layer| -|[#1096](https://github.com/NVIDIA/spark-rapids/issues/1096)|[FEA] Implement parquet CreateDataSourceTableAsSelectCommand| -|[#688](https://github.com/NVIDIA/spark-rapids/issues/688)|[FEA] udf compiler should be auto-appended to `spark.sql.extensions`| -|[#502](https://github.com/NVIDIA/spark-rapids/issues/502)|[FEA] Support Databricks 7.3 LTS Runtime| -|[#764](https://github.com/NVIDIA/spark-rapids/issues/764)|[FEA] Sanity checks for cudf jar mismatch| -|[#1018](https://github.com/NVIDIA/spark-rapids/issues/1018)|[FEA] Log details related to GPU memory fragmentation on GPU OOM| -|[#619](https://github.com/NVIDIA/spark-rapids/issues/619)|[FEA] log whether libcudf and libcudfjni were built for PTDS| -|[#905](https://github.com/NVIDIA/spark-rapids/issues/905)|[FEA] create AWS EMR 3.0.1 shim| -|[#838](https://github.com/NVIDIA/spark-rapids/issues/838)|[FEA] Support window count for a column| -|[#864](https://github.com/NVIDIA/spark-rapids/issues/864)|[FEA] config option to enable RMM arena memory resource| -|[#430](https://github.com/NVIDIA/spark-rapids/issues/430)|[FEA] Audit: Parquet Writer support for TIMESTAMP_MILLIS| -|[#818](https://github.com/NVIDIA/spark-rapids/issues/818)|[FEA] Create shim layer for AWS EMR | -|[#608](https://github.com/NVIDIA/spark-rapids/issues/608)|[FEA] Parquet small file optimization improve handle merge schema| - -### Performance -||| -|:---|:---| -|[#446](https://github.com/NVIDIA/spark-rapids/issues/446)|[FEA] Test jucx in 1.9.x branch| -|[#1038](https://github.com/NVIDIA/spark-rapids/issues/1038)|[FEA] Accelerate the data transfer for plan `WindowInPandasExec`| -|[#533](https://github.com/NVIDIA/spark-rapids/issues/533)|[FEA] Improve PTDS performance| -|[#849](https://github.com/NVIDIA/spark-rapids/issues/849)|[FEA] Have GpuColumnarBatchSerializer return GpuColumnVectorFromBuffer instances| -|[#784](https://github.com/NVIDIA/spark-rapids/issues/784)|[FEA] Allow Host Spilling to be more dynamic| -|[#627](https://github.com/NVIDIA/spark-rapids/issues/627)|[FEA] Further parquet reading small file improvements| -|[#5](https://github.com/NVIDIA/spark-rapids/issues/5)|[FEA] Support Adaptive Execution| - -### Bugs Fixed -||| -|:---|:---| -|[#1423](https://github.com/NVIDIA/spark-rapids/issues/1423)|[BUG] Mortgage ETL sample failed with spark.sql.adaptive enabled on AWS EMR 6.2 | -|[#1369](https://github.com/NVIDIA/spark-rapids/issues/1369)|[BUG] TPC-DS Query Failing on EMR 6.2 with AQE| -|[#1344](https://github.com/NVIDIA/spark-rapids/issues/1344)|[BUG] Spark-rapids Pytests failed on On Databricks cluster spark standalone mode| -|[#1279](https://github.com/NVIDIA/spark-rapids/issues/1279)|[BUG] TPC-DS query 2 failing with NPE| -|[#1280](https://github.com/NVIDIA/spark-rapids/issues/1280)|[BUG] TPC-DS query 93 failing with UnsupportedOperationException| -|[#1308](https://github.com/NVIDIA/spark-rapids/issues/1308)|[BUG] TPC-DS query 14a runs much slower on 0.3| -|[#1284](https://github.com/NVIDIA/spark-rapids/issues/1284)|[BUG] TPC-DS query 77 at scale=1TB fails with maxResultSize exceeded error| -|[#1061](https://github.com/NVIDIA/spark-rapids/issues/1061)|[BUG] orc_test.py is failing| -|[#1197](https://github.com/NVIDIA/spark-rapids/issues/1197)|[BUG] java.lang.NullPointerException when exporting delta table| -|[#685](https://github.com/NVIDIA/spark-rapids/issues/685)|[BUG] In ParqueCachedBatchSerializer, serializing parquet buffers might blow up in certain cases| -|[#1269](https://github.com/NVIDIA/spark-rapids/issues/1269)|[BUG] GpuSubstring is not expected to be a part of a SortOrder| -|[#1246](https://github.com/NVIDIA/spark-rapids/issues/1246)|[BUG] Many TPC-DS benchmarks fail when writing to Parquet| -|[#961](https://github.com/NVIDIA/spark-rapids/issues/961)|[BUG] ORC predicate pushdown should work with case-insensitive analysis| -|[#962](https://github.com/NVIDIA/spark-rapids/issues/962)|[BUG] Loading columns from an ORC file without column names returns no data| -|[#1245](https://github.com/NVIDIA/spark-rapids/issues/1245)|[BUG] Code adding buffers to the spillable store should synchronize| -|[#570](https://github.com/NVIDIA/spark-rapids/issues/570)|[BUG] Continue debugging OOM after ensuring device store is empty| -|[#972](https://github.com/NVIDIA/spark-rapids/issues/972)|[BUG] total time metric is redundant with scan time| -|[#1039](https://github.com/NVIDIA/spark-rapids/issues/1039)|[BUG] UNBOUNDED window ranges on null timestamp columns produces incorrect results.| -|[#1195](https://github.com/NVIDIA/spark-rapids/issues/1195)|[BUG] AcceleratedColumnarToRowIterator queue empty| -|[#1177](https://github.com/NVIDIA/spark-rapids/issues/1177)|[BUG] leaks possible in the rapids shuffle if batches are received after the task completes| -|[#1216](https://github.com/NVIDIA/spark-rapids/issues/1216)|[BUG] Failure to recognize ORC file format when loaded via Hive| -|[#898](https://github.com/NVIDIA/spark-rapids/issues/898)|[BUG] count reductions are failing on databricks because lack for Complete support| -|[#1184](https://github.com/NVIDIA/spark-rapids/issues/1184)|[BUG] test_window_aggregate_udf_array_from_python fails on databricks 3.0.1| -|[#1151](https://github.com/NVIDIA/spark-rapids/issues/1151)|[BUG]Add databricks 3.0.1 shim layer for GpuWindowInPandasExec.| -|[#1199](https://github.com/NVIDIA/spark-rapids/issues/1199)|[BUG] No data size in Input column in Stages page from Spark UI when using Parquet as file source| -|[#1031](https://github.com/NVIDIA/spark-rapids/issues/1031)|[BUG] dependency info properties file contains error messages| -|[#1149](https://github.com/NVIDIA/spark-rapids/issues/1149)|[BUG] Scaladoc warnings in GpuDataSource| -|[#1185](https://github.com/NVIDIA/spark-rapids/issues/1185)|[BUG] test_hash_multiple_mode_query failing| -|[#724](https://github.com/NVIDIA/spark-rapids/issues/724)|[BUG] PySpark test_broadcast_nested_loop_join_special_case intermittent failure| -|[#1164](https://github.com/NVIDIA/spark-rapids/issues/1164)|[BUG] ansi_cast tests are failing in 3.1.0| -|[#1110](https://github.com/NVIDIA/spark-rapids/issues/1110)|[BUG] Special date "now" has wrong value on GPU| -|[#1139](https://github.com/NVIDIA/spark-rapids/issues/1139)|[BUG] Host columnar to GPU can be very slow| -|[#1094](https://github.com/NVIDIA/spark-rapids/issues/1094)|[BUG] unix_timestamp on GPU returns invalid data for special dates| -|[#1098](https://github.com/NVIDIA/spark-rapids/issues/1098)|[BUG] unix_timestamp on GPU returns invalid data for bad input| -|[#1082](https://github.com/NVIDIA/spark-rapids/issues/1082)|[BUG] string to timestamp conversion fails with split| -|[#1140](https://github.com/NVIDIA/spark-rapids/issues/1140)|[BUG] ConcurrentModificationException error after scala test suite completes| -|[#1073](https://github.com/NVIDIA/spark-rapids/issues/1073)|[BUG] java.lang.RuntimeException: BinaryExpressions must override either eval or nullSafeEval| -|[#975](https://github.com/NVIDIA/spark-rapids/issues/975)|[BUG] BroadcastExchangeExec fails to fall back to CPU on driver node on GCP Dataproc| -|[#773](https://github.com/NVIDIA/spark-rapids/issues/773)|[BUG] Investigate high task deserialization| -|[#1035](https://github.com/NVIDIA/spark-rapids/issues/1035)|[BUG] TPC-DS query 90 with AQE enabled fails with doExecuteBroadcast exception| -|[#825](https://github.com/NVIDIA/spark-rapids/issues/825)|[BUG] test_window_aggs_for_ranges intermittently fails| -|[#1008](https://github.com/NVIDIA/spark-rapids/issues/1008)|[BUG] limit function is producing inconsistent result when type is Byte, Long, Boolean and Timestamp| -|[#996](https://github.com/NVIDIA/spark-rapids/issues/996)|[BUG] TPC-DS benchmark via spark-submit does not provide option to disable appending .dat to path| -|[#1006](https://github.com/NVIDIA/spark-rapids/issues/1006)|[BUG] Spark3.1.0 changed BasicWriteTaskStats breaks BasicColumnarWriteTaskStatsTracker| -|[#985](https://github.com/NVIDIA/spark-rapids/issues/985)|[BUG] missing metric `dataSize`| -|[#881](https://github.com/NVIDIA/spark-rapids/issues/881)|[BUG] cannot disable Sort by itself| -|[#812](https://github.com/NVIDIA/spark-rapids/issues/812)|[BUG] Test failures for 0.2 when run with multiple executors| -|[#925](https://github.com/NVIDIA/spark-rapids/issues/925)|[BUG]Range window-functions with non-timestamp order-by expressions not falling back to CPU| -|[#852](https://github.com/NVIDIA/spark-rapids/issues/852)|[BUG] BenchUtils.compareResults cannot compare partitioned files when ignoreOrdering=false| -|[#868](https://github.com/NVIDIA/spark-rapids/issues/868)|[BUG] Rounding error when casting timestamp to string for timestamps before 1970| -|[#880](https://github.com/NVIDIA/spark-rapids/issues/880)|[BUG] doing a window operation with an orderby for a single constant crashes| -|[#776](https://github.com/NVIDIA/spark-rapids/issues/776)|[BUG] Integration test fails on spark 3.1.0-SNAPSHOT| -|[#874](https://github.com/NVIDIA/spark-rapids/issues/874)|[BUG] `RapidsConf.scala` has some un-consistency for `spark.rapids.sql.format.parquet.multiThreadedRead`| -|[#860](https://github.com/NVIDIA/spark-rapids/issues/860)|[BUG] we need to mark columns from received shuffle buffers as `GpuColumnVectorFromBuffer`| -|[#122](https://github.com/NVIDIA/spark-rapids/issues/122)|[BUG] CSV Timestamp parseing is broken for TS < 1902 and TS > 2038| -|[#810](https://github.com/NVIDIA/spark-rapids/issues/810)|[BUG] UDF Integration tests fail if pandas is not installed| -|[#746](https://github.com/NVIDIA/spark-rapids/issues/746)|[BUG] cudf_udf_test.py is flakey| -|[#811](https://github.com/NVIDIA/spark-rapids/issues/811)|[BUG] 0.3 nightly is timing out | -|[#574](https://github.com/NVIDIA/spark-rapids/issues/574)|[BUG] Fix GpuTimeSub for Spark 3.1.0| - -### PRs -||| -|:---|:---| -|[#1496](https://github.com/NVIDIA/spark-rapids/pull/1496)|Update changelog for v0.3.0 release [skip ci]| -|[#1473](https://github.com/NVIDIA/spark-rapids/pull/1473)|Update documentation for 0.3 release| -|[#1371](https://github.com/NVIDIA/spark-rapids/pull/1371)|Start Guide for RAPIDS on AWS EMR 6.2| -|[#1446](https://github.com/NVIDIA/spark-rapids/pull/1446)|Update changelog for 0.3.0 release [skip ci]| -|[#1439](https://github.com/NVIDIA/spark-rapids/pull/1439)|when AQE enabled we fail to fix up exchanges properly and EMR| -|[#1433](https://github.com/NVIDIA/spark-rapids/pull/1433)|fix pandas 1.2 compatible issue| -|[#1424](https://github.com/NVIDIA/spark-rapids/pull/1424)|Make the multi-threaded parquet reader the default since coalescing doesn't handle partitioned files well| -|[#1389](https://github.com/NVIDIA/spark-rapids/pull/1389)|Update project version to 0.3.0| -|[#1387](https://github.com/NVIDIA/spark-rapids/pull/1387)|Update cudf version to 0.17| -|[#1370](https://github.com/NVIDIA/spark-rapids/pull/1370)|[REVIEW] init changelog 0.3 [skip ci]| -|[#1376](https://github.com/NVIDIA/spark-rapids/pull/1376)|MetaUtils.getBatchFromMeta should return batches with GpuColumnVectorFromBuffer| -|[#1358](https://github.com/NVIDIA/spark-rapids/pull/1358)|auto-merge: instant merge after creation [skip ci]| -|[#1359](https://github.com/NVIDIA/spark-rapids/pull/1359)|Use SortOrder from shims.| -|[#1343](https://github.com/NVIDIA/spark-rapids/pull/1343)|Do not run UDFs when the partition is empty.| -|[#1342](https://github.com/NVIDIA/spark-rapids/pull/1342)|Fix and edit docs for standalone mode| -|[#1350](https://github.com/NVIDIA/spark-rapids/pull/1350)|fix GpuRangePartitioning canonicalization| -|[#1281](https://github.com/NVIDIA/spark-rapids/pull/1281)|Documentation added for testing| -|[#1336](https://github.com/NVIDIA/spark-rapids/pull/1336)|Fix missing post-shuffle coalesce with AQE| -|[#1318](https://github.com/NVIDIA/spark-rapids/pull/1318)|Fix copying GpuFileSourceScanExec node| -|[#1337](https://github.com/NVIDIA/spark-rapids/pull/1337)|Use UTC instead of GMT| -|[#1307](https://github.com/NVIDIA/spark-rapids/pull/1307)|Fallback to cpu when reading Delta log files for stats| -|[#1310](https://github.com/NVIDIA/spark-rapids/pull/1310)|Fix canonicalization of GpuFileSourceScanExec, GpuShuffleCoalesceExec| -|[#1302](https://github.com/NVIDIA/spark-rapids/pull/1302)|Add GpuSubstring handling to SortOrder canonicalization| -|[#1265](https://github.com/NVIDIA/spark-rapids/pull/1265)|Chunking input before writing a ParquetCachedBatch| -|[#1278](https://github.com/NVIDIA/spark-rapids/pull/1278)|Add a config to disable decimal types by default| -|[#1272](https://github.com/NVIDIA/spark-rapids/pull/1272)|Add Alias to shims| -|[#1268](https://github.com/NVIDIA/spark-rapids/pull/1268)|Adds in support docs for 0.3 release| -|[#1235](https://github.com/NVIDIA/spark-rapids/pull/1235)|Trigger reading and handling control data.| -|[#1266](https://github.com/NVIDIA/spark-rapids/pull/1266)|Updating Databricks getting started for 0.3 release| -|[#1291](https://github.com/NVIDIA/spark-rapids/pull/1291)|Increase pre-merge resource requests [skip ci]| -|[#1275](https://github.com/NVIDIA/spark-rapids/pull/1275)|Temporarily disable more CAST tests for Spark 3.1.0| -|[#1264](https://github.com/NVIDIA/spark-rapids/pull/1264)|Fix race condition in batch creation| -|[#1260](https://github.com/NVIDIA/spark-rapids/pull/1260)|Update UCX license info in NOTIFY-binary for 1.9 and RAPIDS plugin copyright dates| -|[#1247](https://github.com/NVIDIA/spark-rapids/pull/1247)|Ensure column names are valid when writing benchmark query results to file| -|[#1240](https://github.com/NVIDIA/spark-rapids/pull/1240)|Fix loading from ORC file with no column names| -|[#1242](https://github.com/NVIDIA/spark-rapids/pull/1242)|Remove compatibility documentation about unsupported INT96| -|[#1192](https://github.com/NVIDIA/spark-rapids/pull/1192)|[REVIEW] Support GpuFilter and GpuCoalesceBatches for decimal data| -|[#1170](https://github.com/NVIDIA/spark-rapids/pull/1170)|Add nested type support to MetaUtils| -|[#1194](https://github.com/NVIDIA/spark-rapids/pull/1194)|Drop redundant total time metric from scan| -|[#1248](https://github.com/NVIDIA/spark-rapids/pull/1248)|At BatchedTableCompressor.finish synchronize to allow for "right-size…| -|[#1169](https://github.com/NVIDIA/spark-rapids/pull/1169)|Use CUDF's "UNBOUNDED" window boundaries for time-range queries.| -|[#1204](https://github.com/NVIDIA/spark-rapids/pull/1204)|Avoid empty batches on columnar to row conversion| -|[#1133](https://github.com/NVIDIA/spark-rapids/pull/1133)|Refactor batch coalesce to be based solely on batch data size| -|[#1237](https://github.com/NVIDIA/spark-rapids/pull/1237)|In transport, limit pending transfer requests to fit within a bounce| -|[#1232](https://github.com/NVIDIA/spark-rapids/pull/1232)|Move SortOrder creation to shims| -|[#1068](https://github.com/NVIDIA/spark-rapids/pull/1068)|Write int96 to parquet| -|[#1193](https://github.com/NVIDIA/spark-rapids/pull/1193)|Verify shuffle of decimal columns| -|[#1180](https://github.com/NVIDIA/spark-rapids/pull/1180)|Remove batches if they are received after the iterator detects that t…| -|[#1173](https://github.com/NVIDIA/spark-rapids/pull/1173)|Support relational operators for decimal type| -|[#1220](https://github.com/NVIDIA/spark-rapids/pull/1220)|Support replacing ORC format when Hive is configured| -|[#1219](https://github.com/NVIDIA/spark-rapids/pull/1219)|Upgrade to jucx 1.9.0| -|[#1081](https://github.com/NVIDIA/spark-rapids/pull/1081)|Add option to upload benchmark summary JSON file| -|[#1217](https://github.com/NVIDIA/spark-rapids/pull/1217)|Aggregate reductions in Complete mode should use updateExpressions| -|[#1218](https://github.com/NVIDIA/spark-rapids/pull/1218)|Remove obsolete HiveStringType usage| -|[#1214](https://github.com/NVIDIA/spark-rapids/pull/1214)|changelog update 2020-11-30. Trigger automerge check [skip ci]| -|[#1210](https://github.com/NVIDIA/spark-rapids/pull/1210)|Support auto-merge for branch-0.4 [skip ci]| -|[#1202](https://github.com/NVIDIA/spark-rapids/pull/1202)|Fix a bug with the support for java.lang.StringBuilder.append.| -|[#1213](https://github.com/NVIDIA/spark-rapids/pull/1213)|Skip casting StringType to TimestampType for Spark 310| -|[#1201](https://github.com/NVIDIA/spark-rapids/pull/1201)|Replace only window expressions on databricks.| -|[#1208](https://github.com/NVIDIA/spark-rapids/pull/1208)|[BUG] Fix GHSL2020-239 [skip ci]| -|[#1205](https://github.com/NVIDIA/spark-rapids/pull/1205)|Fix missing input bytes read metric for Parquet| -|[#1206](https://github.com/NVIDIA/spark-rapids/pull/1206)|Update Spark 3.1 shim for ShuffleOrigin shuffle parameter| -|[#1196](https://github.com/NVIDIA/spark-rapids/pull/1196)|Rename ShuffleCoalesceExec to GpuShuffleCoalesceExec| -|[#1191](https://github.com/NVIDIA/spark-rapids/pull/1191)|Skip window array tests for databricks.| -|[#1183](https://github.com/NVIDIA/spark-rapids/pull/1183)|Support for CalendarIntervalType and NullType| -|[#1150](https://github.com/NVIDIA/spark-rapids/pull/1150)|udf spec| -|[#1188](https://github.com/NVIDIA/spark-rapids/pull/1188)|Add in tests for parquet nested pruning support| -|[#1189](https://github.com/NVIDIA/spark-rapids/pull/1189)|Enable NullType for First and Last in 3.0.1+| -|[#1181](https://github.com/NVIDIA/spark-rapids/pull/1181)|Fix resource leaks in unit tests| -|[#1186](https://github.com/NVIDIA/spark-rapids/pull/1186)|Fix compilation and scaladoc warnings| -|[#1187](https://github.com/NVIDIA/spark-rapids/pull/1187)|Updated documentation for distinct count compatibility| -|[#1182](https://github.com/NVIDIA/spark-rapids/pull/1182)|Close buffer catalog on device manager shutdown| -|[#1137](https://github.com/NVIDIA/spark-rapids/pull/1137)|Let GpuWindowInPandas declare ArrayType supported.| -|[#1176](https://github.com/NVIDIA/spark-rapids/pull/1176)|Add in support for null type| -|[#1174](https://github.com/NVIDIA/spark-rapids/pull/1174)|Fix race condition in SerializeConcatHostBuffersDeserializeBatch| -|[#1175](https://github.com/NVIDIA/spark-rapids/pull/1175)|Fix leaks seen in shuffle tests| -|[#1138](https://github.com/NVIDIA/spark-rapids/pull/1138)|[REVIEW] Support decimal type for GpuProjectExec| -|[#1162](https://github.com/NVIDIA/spark-rapids/pull/1162)|Set job descriptions in benchmark runner| -|[#1172](https://github.com/NVIDIA/spark-rapids/pull/1172)|Revert "Fix race condition (#1165)"| -|[#1060](https://github.com/NVIDIA/spark-rapids/pull/1060)|Show partition metrics for custom shuffler reader| -|[#1152](https://github.com/NVIDIA/spark-rapids/pull/1152)|Add spark301db shim layer for WindowInPandas.| -|[#1167](https://github.com/NVIDIA/spark-rapids/pull/1167)|Nulls out the dataframe if --gc-between-runs is set| -|[#1165](https://github.com/NVIDIA/spark-rapids/pull/1165)|Fix race condition in SerializeConcatHostBuffersDeserializeBatch| -|[#1163](https://github.com/NVIDIA/spark-rapids/pull/1163)|Add in support for GetStructField| -|[#1166](https://github.com/NVIDIA/spark-rapids/pull/1166)|Fix the cast tests for 3.1.0+| -|[#1159](https://github.com/NVIDIA/spark-rapids/pull/1159)|fix bug where 'now' had same value as 'today' for timestamps| -|[#1161](https://github.com/NVIDIA/spark-rapids/pull/1161)|Fix nightly build pipeline failure.| -|[#1160](https://github.com/NVIDIA/spark-rapids/pull/1160)|Fix some performance problems with columnar to columnar conversion| -|[#1105](https://github.com/NVIDIA/spark-rapids/pull/1105)|[REVIEW] Change ColumnViewAccess usage to work with ColumnView| -|[#1148](https://github.com/NVIDIA/spark-rapids/pull/1148)|Add in tests for Maps and extend map support where possible| -|[#1154](https://github.com/NVIDIA/spark-rapids/pull/1154)|Mark test as xfail until we can get a fix in| -|[#1113](https://github.com/NVIDIA/spark-rapids/pull/1113)|Support unix_timestamp on GPU for subset of formats| -|[#1156](https://github.com/NVIDIA/spark-rapids/pull/1156)|Fix warning introduced in iterator suite| -|[#1095](https://github.com/NVIDIA/spark-rapids/pull/1095)|Dependency info| -|[#1145](https://github.com/NVIDIA/spark-rapids/pull/1145)|Remove support for databricks 7.0 runtime - shim spark300db| -|[#1147](https://github.com/NVIDIA/spark-rapids/pull/1147)|Change the assert to require for handling TIMESTAMP_MILLIS in isDateTimeRebaseNeeded | -|[#1132](https://github.com/NVIDIA/spark-rapids/pull/1132)|Add in basic support to read structs from parquet| -|[#1121](https://github.com/NVIDIA/spark-rapids/pull/1121)|Shuffle/better error handling| -|[#1134](https://github.com/NVIDIA/spark-rapids/pull/1134)|Support saveAsTable for writing orc and parquet| -|[#1124](https://github.com/NVIDIA/spark-rapids/pull/1124)|Add shim layers for GpuWindowInPandasExec.| -|[#1131](https://github.com/NVIDIA/spark-rapids/pull/1131)|Add in some basic support for Structs| -|[#1127](https://github.com/NVIDIA/spark-rapids/pull/1127)|Add in basic support for reading lists from parquet| -|[#1129](https://github.com/NVIDIA/spark-rapids/pull/1129)|Fix resource leaks with new shuffle optimization| -|[#1116](https://github.com/NVIDIA/spark-rapids/pull/1116)|Optimize normal shuffle by coalescing smaller batches on host| -|[#1102](https://github.com/NVIDIA/spark-rapids/pull/1102)|Auto-register UDF extention when main plugin is set| -|[#1108](https://github.com/NVIDIA/spark-rapids/pull/1108)|Remove integration test pipelines on NGCC| -|[#1123](https://github.com/NVIDIA/spark-rapids/pull/1123)|Mark Pandas udf over window tests as xfail on databricks until they can be fixed| -|[#1120](https://github.com/NVIDIA/spark-rapids/pull/1120)|Add in support for filtering ArrayType| -|[#1080](https://github.com/NVIDIA/spark-rapids/pull/1080)|Support for CalendarIntervalType and NullType for ParquetCachedSerializer| -|[#994](https://github.com/NVIDIA/spark-rapids/pull/994)|Packs bounce buffers for highly partitioned shuffles| -|[#1112](https://github.com/NVIDIA/spark-rapids/pull/1112)|Remove bad config from pytest setup| -|[#1107](https://github.com/NVIDIA/spark-rapids/pull/1107)|closeOnExcept -> withResources in MetaUtils| -|[#1104](https://github.com/NVIDIA/spark-rapids/pull/1104)|Support lists to/from the GPU| -|[#1106](https://github.com/NVIDIA/spark-rapids/pull/1106)|Improve mechanism for expected exceptions in tests| -|[#1069](https://github.com/NVIDIA/spark-rapids/pull/1069)|Accelerate the data transfer between JVM and Python for the plan 'GpuWindowInPandasExec'| -|[#1099](https://github.com/NVIDIA/spark-rapids/pull/1099)|Update how we deal with type checking| -|[#1077](https://github.com/NVIDIA/spark-rapids/pull/1077)|Improve AQE transitions for shuffle and coalesce batches| -|[#1097](https://github.com/NVIDIA/spark-rapids/pull/1097)|Cleanup some instances of excess closure serialization| -|[#1090](https://github.com/NVIDIA/spark-rapids/pull/1090)|Fix the integration build| -|[#1086](https://github.com/NVIDIA/spark-rapids/pull/1086)|Speed up test performance using pytest-xdist| -|[#1084](https://github.com/NVIDIA/spark-rapids/pull/1084)|Avoid issues where more scalars that expected show up in an expression| -|[#1076](https://github.com/NVIDIA/spark-rapids/pull/1076)|[FEA] Support Databricks 7.3 LTS Runtime| -|[#1083](https://github.com/NVIDIA/spark-rapids/pull/1083)|Revert "Get cudf/spark dependency from the correct .m2 dir"| -|[#1062](https://github.com/NVIDIA/spark-rapids/pull/1062)|Get cudf/spark dependency from the correct .m2 dir| -|[#1078](https://github.com/NVIDIA/spark-rapids/pull/1078)|Another round of fixes for mapping of DataType to DType| -|[#1066](https://github.com/NVIDIA/spark-rapids/pull/1066)|More fixes for conversion to ColumnarBatch| -|[#1029](https://github.com/NVIDIA/spark-rapids/pull/1029)|BenchmarkRunner should produce JSON summary file even when queries fail| -|[#1055](https://github.com/NVIDIA/spark-rapids/pull/1055)|Fix build warnings| -|[#1064](https://github.com/NVIDIA/spark-rapids/pull/1064)|Use array instead of List for from(Table, DataType)| -|[#1057](https://github.com/NVIDIA/spark-rapids/pull/1057)|Fix empty table broadcast requiring a GPU on driver node| -|[#1047](https://github.com/NVIDIA/spark-rapids/pull/1047)|Sanity checks for cudf jar mismatch| -|[#1044](https://github.com/NVIDIA/spark-rapids/pull/1044)|Accelerated row to columnar and columnar to row transitions| -|[#1056](https://github.com/NVIDIA/spark-rapids/pull/1056)|Add query number to Spark app name when running benchmarks| -|[#1054](https://github.com/NVIDIA/spark-rapids/pull/1054)|Log total RMM allocated on GPU OOM| -|[#1053](https://github.com/NVIDIA/spark-rapids/pull/1053)|Remove isGpuBroadcastNestedLoopJoin from shims| -|[#1052](https://github.com/NVIDIA/spark-rapids/pull/1052)|Allow for GPUCoalesceBatch to deal with Map| -|[#1051](https://github.com/NVIDIA/spark-rapids/pull/1051)|Add simple retry for URM dependencies [skip ci]| -|[#1046](https://github.com/NVIDIA/spark-rapids/pull/1046)|Fix broken links| -|[#1017](https://github.com/NVIDIA/spark-rapids/pull/1017)|Log whether PTDS is enabled| -|[#1040](https://github.com/NVIDIA/spark-rapids/pull/1040)|Update to cudf 0.17-SNAPSHOT and fix tests| -|[#1042](https://github.com/NVIDIA/spark-rapids/pull/1042)|Fix inconsistencies in AQE support for broadcast joins| -|[#1037](https://github.com/NVIDIA/spark-rapids/pull/1037)|Add in support for the SQL functions Least and Greatest| -|[#1036](https://github.com/NVIDIA/spark-rapids/pull/1036)|Increase number of retries when waiting for databricks cluster| -|[#1034](https://github.com/NVIDIA/spark-rapids/pull/1034)|[BUG] To honor spark.rapids.memory.gpu.pool=NONE| -|[#854](https://github.com/NVIDIA/spark-rapids/pull/854)|Arbitrary function call in UDF| -|[#1028](https://github.com/NVIDIA/spark-rapids/pull/1028)|Update to cudf-0.16| -|[#1023](https://github.com/NVIDIA/spark-rapids/pull/1023)|Add --gc-between-run flag for TPC* benchmarks.| -|[#1001](https://github.com/NVIDIA/spark-rapids/pull/1001)|ColumnarBatch to CachedBatch and back| -|[#990](https://github.com/NVIDIA/spark-rapids/pull/990)|Parquet coalesce file reader for local filesystems| -|[#1014](https://github.com/NVIDIA/spark-rapids/pull/1014)|Add --append-dat flag for TPC-DS benchmark| -|[#991](https://github.com/NVIDIA/spark-rapids/pull/991)|Updated GCP Dataproc Mortgage-ETL-GPU.ipynb| -|[#886](https://github.com/NVIDIA/spark-rapids/pull/886)|Spark BinaryType and cast to BinaryType| -|[#1016](https://github.com/NVIDIA/spark-rapids/pull/1016)|Change Hash Aggregate to allow pass-through on MapType| -|[#984](https://github.com/NVIDIA/spark-rapids/pull/984)|Add support for MapType in selected operators | -|[#1012](https://github.com/NVIDIA/spark-rapids/pull/1012)|Update for new position parameter in Spark 3.1.0 RegExpReplace| -|[#995](https://github.com/NVIDIA/spark-rapids/pull/995)|Add shim for EMR 3.0.1 and EMR 3.0.1-SNAPSHOT| -|[#998](https://github.com/NVIDIA/spark-rapids/pull/998)|Update benchmark automation script| -|[#1000](https://github.com/NVIDIA/spark-rapids/pull/1000)|Always use RAPIDS shuffle when running TPCH and Mortgage tests| -|[#981](https://github.com/NVIDIA/spark-rapids/pull/981)|Change databricks build to dynamically create a cluster| -|[#986](https://github.com/NVIDIA/spark-rapids/pull/986)|Fix missing dataSize metric when using RAPIDS shuffle| -|[#914](https://github.com/NVIDIA/spark-rapids/pull/914)|Write InternalRow to CachedBatch| -|[#934](https://github.com/NVIDIA/spark-rapids/pull/934)|Iterator to make it easier to work with a window of blocks in the RAPIDS shuffle| -|[#992](https://github.com/NVIDIA/spark-rapids/pull/992)|Skip post-clean if aborted before the image build stage in pre-merge [skip ci]| -|[#988](https://github.com/NVIDIA/spark-rapids/pull/988)|Change in Spark caused the 3.1.0 CI to fail| -|[#983](https://github.com/NVIDIA/spark-rapids/pull/983)|clean jenkins file for premerge on NGCC| -|[#964](https://github.com/NVIDIA/spark-rapids/pull/964)|Refactor TPC benchmarks to reduce duplicate code| -|[#978](https://github.com/NVIDIA/spark-rapids/pull/978)|Enable scalastyle checks for udf-compiler module| -|[#949](https://github.com/NVIDIA/spark-rapids/pull/949)|Fix GpuWindowExec to work with a CPU SortExec| -|[#973](https://github.com/NVIDIA/spark-rapids/pull/973)|Stop reporting totalTime metric for GpuShuffleExchangeExec| -|[#968](https://github.com/NVIDIA/spark-rapids/pull/968)|XFail pos_explode tests until final fix can be put in| -|[#970](https://github.com/NVIDIA/spark-rapids/pull/970)|Add legacy config to clear active Spark 3.1.0 session in tests| -|[#918](https://github.com/NVIDIA/spark-rapids/pull/918)|Benchmark runner script| -|[#915](https://github.com/NVIDIA/spark-rapids/pull/915)|Add option to control number of partitions when converting from CSV to Parquet| -|[#944](https://github.com/NVIDIA/spark-rapids/pull/944)|Fix some issues with non-determinism| -|[#935](https://github.com/NVIDIA/spark-rapids/pull/935)|Add in support/tests for a window count on a column| -|[#940](https://github.com/NVIDIA/spark-rapids/pull/940)|Fix closeOnExcept suppressed exception handling| -|[#942](https://github.com/NVIDIA/spark-rapids/pull/942)|fix github action env setup [skip ci]| -|[#933](https://github.com/NVIDIA/spark-rapids/pull/933)|Update first/last tests to avoid non-determinisim and ordering differences| -|[#931](https://github.com/NVIDIA/spark-rapids/pull/931)|Fix checking for nullable columns in window range query| -|[#924](https://github.com/NVIDIA/spark-rapids/pull/924)|Benchmark guide update for command-line interface / spark-submit| -|[#926](https://github.com/NVIDIA/spark-rapids/pull/926)|Move pandas_udf functions into the tests functions| -|[#929](https://github.com/NVIDIA/spark-rapids/pull/929)|Pick a default tableId to use that is non 0 so that flatbuffers allow…| -|[#928](https://github.com/NVIDIA/spark-rapids/pull/928)|Fix RapidsBufferStore NPE when no spillable buffers are available| -|[#820](https://github.com/NVIDIA/spark-rapids/pull/820)|Benchmarking guide| -|[#859](https://github.com/NVIDIA/spark-rapids/pull/859)|Compare partitioned files in order| -|[#916](https://github.com/NVIDIA/spark-rapids/pull/916)|create new sparkContext explicitly in CPU notebook| -|[#917](https://github.com/NVIDIA/spark-rapids/pull/917)|create new SparkContext in GPU notebook explicitly.| -|[#919](https://github.com/NVIDIA/spark-rapids/pull/919)|Add label benchmark to performance subsection in changelog| -|[#850](https://github.com/NVIDIA/spark-rapids/pull/850)| Add in basic support for lead/lag| -|[#843](https://github.com/NVIDIA/spark-rapids/pull/843)|[REVIEW] Cache plugin to handle reading CachedBatch to an InternalRow| -|[#904](https://github.com/NVIDIA/spark-rapids/pull/904)|Add command-line argument for benchmark result filename| -|[#909](https://github.com/NVIDIA/spark-rapids/pull/909)|GCP preview version image name update| -|[#903](https://github.com/NVIDIA/spark-rapids/pull/903)|update getting-started-gcp.md with new component list| -|[#900](https://github.com/NVIDIA/spark-rapids/pull/900)|Turn off CollectLimitExec replacement by default| -|[#907](https://github.com/NVIDIA/spark-rapids/pull/907)|remove configs from databricks that shouldn't be used by default| -|[#893](https://github.com/NVIDIA/spark-rapids/pull/893)|Fix rounding error when casting timestamp to string for timestamps before 1970| -|[#899](https://github.com/NVIDIA/spark-rapids/pull/899)|Mark reduction corner case tests as xfail on databricks until they can be fixed| -|[#894](https://github.com/NVIDIA/spark-rapids/pull/894)|Replace whole-buffer slicing with direct refcounting| -|[#891](https://github.com/NVIDIA/spark-rapids/pull/891)|Add config to dump heap on GPU OOM| -|[#890](https://github.com/NVIDIA/spark-rapids/pull/890)|Clean up CoalesceBatch to use withResource| -|[#892](https://github.com/NVIDIA/spark-rapids/pull/892)|Only manifest the current batch in cached block shuffle read iterator| -|[#871](https://github.com/NVIDIA/spark-rapids/pull/871)|Add support for using the arena allocator| -|[#889](https://github.com/NVIDIA/spark-rapids/pull/889)|Fix crash on scalar only orderby| -|[#879](https://github.com/NVIDIA/spark-rapids/pull/879)|Update SpillableColumnarBatch to remove buffer from catalog on close| -|[#888](https://github.com/NVIDIA/spark-rapids/pull/888)|Shrink detect scope to compile only [skip ci]| -|[#885](https://github.com/NVIDIA/spark-rapids/pull/885)|[BUG] fix IT dockerfile arguments [skip ci]| -|[#883](https://github.com/NVIDIA/spark-rapids/pull/883)|[BUG] fix IT dockerfile args ordering [skip ci]| -|[#875](https://github.com/NVIDIA/spark-rapids/pull/875)|fix the non-consistency for `spark.rapids.sql.format.parquet.multiThreadedRead` in RapidsConf.scala| -|[#862](https://github.com/NVIDIA/spark-rapids/pull/862)|Migrate nightly&integration pipelines to blossom [skip ci]| -|[#872](https://github.com/NVIDIA/spark-rapids/pull/872)|Ensure that receive-side batches use GpuColumnVectorFromBuffer to avoid| -|[#833](https://github.com/NVIDIA/spark-rapids/pull/833)|Add nvcomp LZ4 codec support| -|[#870](https://github.com/NVIDIA/spark-rapids/pull/870)|Cleaned up tests and documentation for csv timestamp parsing| -|[#823](https://github.com/NVIDIA/spark-rapids/pull/823)|Add command-line interface for TPC-* for use with spark-submit| -|[#856](https://github.com/NVIDIA/spark-rapids/pull/856)|Move GpuWindowInPandasExec in shims layers| -|[#756](https://github.com/NVIDIA/spark-rapids/pull/756)|Add stream-time metric| -|[#832](https://github.com/NVIDIA/spark-rapids/pull/832)|Skip pandas tests if pandas cannot be found| -|[#841](https://github.com/NVIDIA/spark-rapids/pull/841)|Fix a hanging issue when processing empty data.| -|[#840](https://github.com/NVIDIA/spark-rapids/pull/840)|[REVIEW] Fixed failing cache tests| -|[#848](https://github.com/NVIDIA/spark-rapids/pull/848)|Update task memory and disk spill metrics when buffer store spills| -|[#851](https://github.com/NVIDIA/spark-rapids/pull/851)|Use contiguous table when deserializing columnar batch| -|[#857](https://github.com/NVIDIA/spark-rapids/pull/857)|fix pvc scheduling issue| -|[#853](https://github.com/NVIDIA/spark-rapids/pull/853)|Remove nodeAffinity from premerge pipeline| -|[#796](https://github.com/NVIDIA/spark-rapids/pull/796)|Record spark plan SQL metrics to JSON when running benchmarks| -|[#781](https://github.com/NVIDIA/spark-rapids/pull/781)|Add AQE unit tests| -|[#824](https://github.com/NVIDIA/spark-rapids/pull/824)|Skip cudf_udf test by default| -|[#839](https://github.com/NVIDIA/spark-rapids/pull/839)|First/Last reduction and cleanup of agg APIs| -|[#827](https://github.com/NVIDIA/spark-rapids/pull/827)|Add Spark 3.0 EMR Shim layer | -|[#816](https://github.com/NVIDIA/spark-rapids/pull/816)|[BUG] fix nightly is timing out| -|[#782](https://github.com/NVIDIA/spark-rapids/pull/782)|Benchmark utility to perform diff of output from benchmark runs, allowing for precision differences| -|[#813](https://github.com/NVIDIA/spark-rapids/pull/813)|Revert "Enable tests in udf_cudf_test.py"| -|[#788](https://github.com/NVIDIA/spark-rapids/pull/788)|[FEA] Persist workspace data on PVC for premerge| -|[#805](https://github.com/NVIDIA/spark-rapids/pull/805)|[FEA] nightly build trigger both IT on spark 300 and 301| -|[#797](https://github.com/NVIDIA/spark-rapids/pull/797)|Allow host spill store to fit a buffer larger than configured max size| -|[#807](https://github.com/NVIDIA/spark-rapids/pull/807)|Deploy integration-tests javadoc and sources| -|[#777](https://github.com/NVIDIA/spark-rapids/pull/777)|Enable tests in udf_cudf_test.py| -|[#790](https://github.com/NVIDIA/spark-rapids/pull/790)|CI: Update cudf python to 0.16 nightly| -|[#772](https://github.com/NVIDIA/spark-rapids/pull/772)|Add support for empty array construction.| -|[#783](https://github.com/NVIDIA/spark-rapids/pull/783)|Improved GpuArrowEvalPythonExec| -|[#771](https://github.com/NVIDIA/spark-rapids/pull/771)|Various improvements to benchmarks| -|[#763](https://github.com/NVIDIA/spark-rapids/pull/763)|[REVIEW] Allow CoalesceBatch to spill data that is not in active use| -|[#727](https://github.com/NVIDIA/spark-rapids/pull/727)|Update cudf dependency to 0.16-SNAPSHOT| -|[#726](https://github.com/NVIDIA/spark-rapids/pull/726)|parquet writer support for TIMESTAMP_MILLIS| -|[#674](https://github.com/NVIDIA/spark-rapids/pull/674)|Unit test for GPU exchange re-use with AQE| -|[#723](https://github.com/NVIDIA/spark-rapids/pull/723)|Update code coverage to find source files in new places| -|[#766](https://github.com/NVIDIA/spark-rapids/pull/766)|Update the integration Dockerfile to reduce the image size| -|[#762](https://github.com/NVIDIA/spark-rapids/pull/762)|Fixing conflicts in branch-0.3| -|[#738](https://github.com/NVIDIA/spark-rapids/pull/738)|[auto-merge] branch-0.2 to branch-0.3 - resolve conflict| -|[#722](https://github.com/NVIDIA/spark-rapids/pull/722)|Initial code changes to support spilling outside of shuffle| -|[#693](https://github.com/NVIDIA/spark-rapids/pull/693)|Update jenkins files for 0.3| -|[#692](https://github.com/NVIDIA/spark-rapids/pull/692)|Merge shims dependency to spark-3.0.1 into branch-0.3| -|[#690](https://github.com/NVIDIA/spark-rapids/pull/690)|Update the version to 0.3.0-SNAPSHOT| - -## Release 0.2 - -### Features -||| -|:---|:---| -|[#696](https://github.com/NVIDIA/spark-rapids/issues/696)|[FEA] run integration tests against SPARK-3.0.1| -|[#455](https://github.com/NVIDIA/spark-rapids/issues/455)|[FEA] Support UCX shuffle with optimized AQE| -|[#510](https://github.com/NVIDIA/spark-rapids/issues/510)|[FEA] Investigate libcudf features needed to support struct schema pruning during loads| -|[#541](https://github.com/NVIDIA/spark-rapids/issues/541)|[FEA] Scala UDF:Support for null Value operands| -|[#542](https://github.com/NVIDIA/spark-rapids/issues/542)|[FEA] Scala UDF: Support for Date and Time | -|[#499](https://github.com/NVIDIA/spark-rapids/issues/499)|[FEA] disable any kind of warnings about ExecutedCommandExec not being on the GPU| -|[#540](https://github.com/NVIDIA/spark-rapids/issues/540)|[FEA] Scala UDF: Support for String replaceFirst()| -|[#340](https://github.com/NVIDIA/spark-rapids/issues/340)|[FEA] widen the rendered Jekyll pages| -|[#602](https://github.com/NVIDIA/spark-rapids/issues/602)|[FEA] don't release with any -SNAPSHOT dependencies| -|[#579](https://github.com/NVIDIA/spark-rapids/issues/579)|[FEA] Auto-merge between branches| -|[#515](https://github.com/NVIDIA/spark-rapids/issues/515)|[FEA] Write tests for AQE skewed join optimization| -|[#452](https://github.com/NVIDIA/spark-rapids/issues/452)|[FEA] Update HashSortOptimizerSuite to work with AQE| -|[#454](https://github.com/NVIDIA/spark-rapids/issues/454)|[FEA] Update GpuCoalesceBatchesSuite to work with AQE enabled| -|[#354](https://github.com/NVIDIA/spark-rapids/issues/354)|[FEA]Spark 3.1 FileSourceScanExec adds parameter optionalNumCoalescedBuckets| -|[#566](https://github.com/NVIDIA/spark-rapids/issues/566)|[FEA] Add support for StringSplit with an array index.| -|[#524](https://github.com/NVIDIA/spark-rapids/issues/524)|[FEA] Add GPU specific metrics to GpuFileSourceScanExec| -|[#494](https://github.com/NVIDIA/spark-rapids/issues/494)|[FEA] Add some AQE-specific tests to the PySpark test suite| -|[#146](https://github.com/NVIDIA/spark-rapids/issues/146)|[FEA] Python tests should support running with Adaptive Query Execution enabled| -|[#465](https://github.com/NVIDIA/spark-rapids/issues/465)|[FEA] Audit: Update script to audit multiple versions of Spark | -|[#488](https://github.com/NVIDIA/spark-rapids/issues/488)|[FEA] Ability to limit total GPU memory used| -|[#70](https://github.com/NVIDIA/spark-rapids/issues/70)|[FEA] Support StringSplit| -|[#403](https://github.com/NVIDIA/spark-rapids/issues/403)|[FEA] Add in support for GetArrayItem| -|[#493](https://github.com/NVIDIA/spark-rapids/issues/493)|[FEA] Implement shuffle optimization when AQE is enabled| -|[#500](https://github.com/NVIDIA/spark-rapids/issues/500)|[FEA] Add maven profiles for testing with AQE on or off| -|[#471](https://github.com/NVIDIA/spark-rapids/issues/471)|[FEA] create a formal process for updating the github-pages branch| -|[#233](https://github.com/NVIDIA/spark-rapids/issues/233)|[FEA] Audit DataWritingCommandExec | -|[#240](https://github.com/NVIDIA/spark-rapids/issues/240)|[FEA] Audit Api validation script follow on - Optimize StringToTypeTag | -|[#388](https://github.com/NVIDIA/spark-rapids/issues/388)|[FEA] Audit WindowExec| -|[#425](https://github.com/NVIDIA/spark-rapids/issues/425)|[FEA] Add tests for configs in BatchScan Readers| -|[#453](https://github.com/NVIDIA/spark-rapids/issues/453)|[FEA] Update HashAggregatesSuite to work with AQE| -|[#184](https://github.com/NVIDIA/spark-rapids/issues/184)|[FEA] Enable NoScalaDoc scalastyle rule| -|[#438](https://github.com/NVIDIA/spark-rapids/issues/438)|[FEA] Enable StringLPad| -|[#232](https://github.com/NVIDIA/spark-rapids/issues/232)|[FEA] Audit SortExec | -|[#236](https://github.com/NVIDIA/spark-rapids/issues/236)|[FEA] Audit ShuffleExchangeExec | -|[#355](https://github.com/NVIDIA/spark-rapids/issues/355)|[FEA] Support Multiple Spark versions in the same jar| -|[#385](https://github.com/NVIDIA/spark-rapids/issues/385)|[FEA] Support RangeExec on the GPU| -|[#317](https://github.com/NVIDIA/spark-rapids/issues/317)|[FEA] Write test wrapper to run SQL queries via pyspark| -|[#235](https://github.com/NVIDIA/spark-rapids/issues/235)|[FEA] Audit BroadcastExchangeExec| -|[#234](https://github.com/NVIDIA/spark-rapids/issues/234)|[FEA] Audit BatchScanExec| -|[#238](https://github.com/NVIDIA/spark-rapids/issues/238)|[FEA] Audit ShuffledHashJoinExec | -|[#237](https://github.com/NVIDIA/spark-rapids/issues/237)|[FEA] Audit BroadcastHashJoinExec | -|[#316](https://github.com/NVIDIA/spark-rapids/issues/316)|[FEA] Add some basic Dataframe tests for CoalesceExec| -|[#145](https://github.com/NVIDIA/spark-rapids/issues/145)|[FEA] Scala tests should support running with Adaptive Query Execution enabled| -|[#231](https://github.com/NVIDIA/spark-rapids/issues/231)|[FEA] Audit ProjectExec | -|[#229](https://github.com/NVIDIA/spark-rapids/issues/229)|[FEA] Audit FileSourceScanExec | - -### Performance -||| -|:---|:---| -|[#326](https://github.com/NVIDIA/spark-rapids/issues/326)|[DISCUSS] Shuffle read-side error handling| -|[#601](https://github.com/NVIDIA/spark-rapids/issues/601)|[FEA] Optimize unnecessary sorts when replacing SortAggregate| -|[#333](https://github.com/NVIDIA/spark-rapids/issues/333)|[FEA] Better handling of reading lots of small Parquet files| -|[#511](https://github.com/NVIDIA/spark-rapids/issues/511)|[FEA] Connect shuffle table compression to shuffle exec metrics| -|[#15](https://github.com/NVIDIA/spark-rapids/issues/15)|[FEA] Multiple threads sharing the same GPU| -|[#272](https://github.com/NVIDIA/spark-rapids/issues/272)|[DOC] Getting started guide for UCX shuffle| - -### Bugs Fixed -||| -|:---|:---| -|[#780](https://github.com/NVIDIA/spark-rapids/issues/780)|[BUG] Inner Join dropping data with bucketed Table input| -|[#569](https://github.com/NVIDIA/spark-rapids/issues/569)|[BUG] left_semi_join operation is abnormal and serious time-consuming| -|[#744](https://github.com/NVIDIA/spark-rapids/issues/744)|[BUG] TPC-DS query 6 now produces incorrect results.| -|[#718](https://github.com/NVIDIA/spark-rapids/issues/718)|[BUG] GpuBroadcastHashJoinExec ArrayIndexOutOfBoundsException| -|[#698](https://github.com/NVIDIA/spark-rapids/issues/698)|[BUG] batch coalesce can fail to appear between columnar shuffle and subsequent columnar operation| -|[#658](https://github.com/NVIDIA/spark-rapids/issues/658)|[BUG] GpuCoalesceBatches collectTime metric can be underreported| -|[#59](https://github.com/NVIDIA/spark-rapids/issues/59)|[BUG] enable tests for string literals in a select| -|[#486](https://github.com/NVIDIA/spark-rapids/issues/486)|[BUG] GpuWindowExec does not implement requiredChildOrdering| -|[#631](https://github.com/NVIDIA/spark-rapids/issues/631)|[BUG] Rows are dropped when AQE is enabled in some cases| -|[#671](https://github.com/NVIDIA/spark-rapids/issues/671)|[BUG] Databricks hash_aggregate_test fails trying to canonicalize a WrappedAggFunction| -|[#218](https://github.com/NVIDIA/spark-rapids/issues/218)|[BUG] Window function COUNT(x) includes null-values, when it shouldn't| -|[#153](https://github.com/NVIDIA/spark-rapids/issues/153)|[BUG] Incorrect output from partial-only hash aggregates with multiple distincts and non-distinct functions| -|[#656](https://github.com/NVIDIA/spark-rapids/issues/656)|[BUG] integration tests produce hive metadata files| -|[#607](https://github.com/NVIDIA/spark-rapids/issues/607)|[BUG] Fix misleading "cannot run on GPU" warnings when AQE is enabled| -|[#630](https://github.com/NVIDIA/spark-rapids/issues/630)|[BUG] GpuCustomShuffleReader metrics always show zero rows/batches output| -|[#643](https://github.com/NVIDIA/spark-rapids/issues/643)|[BUG] race condition while registering a buffer and spilling at the same time| -|[#606](https://github.com/NVIDIA/spark-rapids/issues/606)|[BUG] Multiple scans for same data source with TPC-DS query59 with delta format| -|[#626](https://github.com/NVIDIA/spark-rapids/issues/626)|[BUG] parquet_test showing leaked memory buffer| -|[#155](https://github.com/NVIDIA/spark-rapids/issues/155)|[BUG] Incorrect output from averages with filters in partial only mode| -|[#277](https://github.com/NVIDIA/spark-rapids/issues/277)|[BUG] HashAggregateSuite failure when AQE is enabled| -|[#276](https://github.com/NVIDIA/spark-rapids/issues/276)|[BUG] GpuCoalesceBatchSuite failure when AQE is enabled| -|[#598](https://github.com/NVIDIA/spark-rapids/issues/598)|[BUG] Non-deterministic output from MapOutputTracker.getStatistics() with AQE on GPU| -|[#192](https://github.com/NVIDIA/spark-rapids/issues/192)|[BUG] test_read_merge_schema fails on Databricks| -|[#341](https://github.com/NVIDIA/spark-rapids/issues/341)|[BUG] Document compression formats for readers/writers| -|[#587](https://github.com/NVIDIA/spark-rapids/issues/587)|[BUG] Spark3.1 changed FileScan which means or GpuScans need to be added to shim layer| -|[#362](https://github.com/NVIDIA/spark-rapids/issues/362)|[BUG] Implement getReaderForRange in the RapidsShuffleManager| -|[#528](https://github.com/NVIDIA/spark-rapids/issues/528)|[BUG] HashAggregateSuite "Avg Distinct with filter" no longer valid when testing against Spark 3.1.0| -|[#416](https://github.com/NVIDIA/spark-rapids/issues/416)|[BUG] Fix Spark 3.1.0 integration tests| -|[#556](https://github.com/NVIDIA/spark-rapids/issues/556)|[BUG] NPE when removing shuffle| -|[#553](https://github.com/NVIDIA/spark-rapids/issues/553)|[BUG] GpuColumnVector build warnings from raw type access| -|[#492](https://github.com/NVIDIA/spark-rapids/issues/492)|[BUG] Re-enable AQE integration tests| -|[#275](https://github.com/NVIDIA/spark-rapids/issues/275)|[BUG] TpchLike query 2 fails when AQE is enabled| -|[#508](https://github.com/NVIDIA/spark-rapids/issues/508)|[BUG] GpuUnion publishes metrics on the UI that are all 0| -|[#269](https://github.com/NVIDIA/spark-rapids/issues/269)|Needed to add `--conf spark.driver.extraClassPath=` | -|[#473](https://github.com/NVIDIA/spark-rapids/issues/473)|[BUG] PartMerge:countDistinct:sum fails sporadically| -|[#531](https://github.com/NVIDIA/spark-rapids/issues/531)|[BUG] Temporary RMM workaround needs to be removed| -|[#532](https://github.com/NVIDIA/spark-rapids/issues/532)|[BUG] NPE when enabling shuffle manager| -|[#525](https://github.com/NVIDIA/spark-rapids/issues/525)|[BUG] GpuFilterExec reports incorrect nullability of output in some cases| -|[#483](https://github.com/NVIDIA/spark-rapids/issues/483)|[BUG] Multiple scans for the same parquet data source| -|[#382](https://github.com/NVIDIA/spark-rapids/issues/382)|[BUG] Spark3.1 StringFallbackSuite regexp_replace null cpu fall back test fails.| -|[#489](https://github.com/NVIDIA/spark-rapids/issues/489)|[FEA] Fix Spark 3.1 GpuHashJoin since it now requires CodegenSupport| -|[#441](https://github.com/NVIDIA/spark-rapids/issues/441)|[BUG] test_broadcast_nested_loop_join_special_case fails on databricks| -|[#347](https://github.com/NVIDIA/spark-rapids/issues/347)|[BUG] Failed to read Parquet file generated by GPU-enabled Spark.| -|[#433](https://github.com/NVIDIA/spark-rapids/issues/433)|`InSet` operator produces an error for Strings| -|[#144](https://github.com/NVIDIA/spark-rapids/issues/144)|[BUG] spark.sql.legacy.parquet.datetimeRebaseModeInWrite is ignored| -|[#323](https://github.com/NVIDIA/spark-rapids/issues/323)|[BUG] GpuBroadcastNestedLoopJoinExec can fail if there are no columns| -|[#356](https://github.com/NVIDIA/spark-rapids/issues/356)|[BUG] Integration cache test for BroadcastNestedLoopJoin failure| -|[#280](https://github.com/NVIDIA/spark-rapids/issues/280)|[BUG] Full Outer Join does not work on nullable keys| -|[#149](https://github.com/NVIDIA/spark-rapids/issues/149)|[BUG] Spark driver fails to load native libs when running on node without CUDA| - -### PRs -||| -|:---|:---| -|[#826](https://github.com/NVIDIA/spark-rapids/pull/826)|Fix link to cudf-0.15-cuda11.jar| -|[#815](https://github.com/NVIDIA/spark-rapids/pull/815)|Update documentation for Scala UDFs in 0.2 since you need two things| -|[#802](https://github.com/NVIDIA/spark-rapids/pull/802)|Update 0.2 CHANGELOG| -|[#793](https://github.com/NVIDIA/spark-rapids/pull/793)|Update Jenkins scripts for release| -|[#798](https://github.com/NVIDIA/spark-rapids/pull/798)|Fix shims provider override config not being seen by executors| -|[#785](https://github.com/NVIDIA/spark-rapids/pull/785)|Make shuffle run on CPU if we do a join where we read from bucketed table| -|[#765](https://github.com/NVIDIA/spark-rapids/pull/765)|Add config to override shims provider class| -|[#759](https://github.com/NVIDIA/spark-rapids/pull/759)|Add CHANGELOG for release 0.2| -|[#758](https://github.com/NVIDIA/spark-rapids/pull/758)|Skip the udf test fails periodically.| -|[#752](https://github.com/NVIDIA/spark-rapids/pull/752)|Fix snapshot plugin jar version in docs| -|[#751](https://github.com/NVIDIA/spark-rapids/pull/751)|Correct the channel for cudf installation| -|[#754](https://github.com/NVIDIA/spark-rapids/pull/754)|Filter nulls from joins where possible to improve performance| -|[#732](https://github.com/NVIDIA/spark-rapids/pull/732)|Add a timeout for RapidsShuffleIterator to prevent jobs to hang infin…| -|[#637](https://github.com/NVIDIA/spark-rapids/pull/637)|Documentation changes for 0.2 release | -|[#747](https://github.com/NVIDIA/spark-rapids/pull/747)|Disable udf tests that fail periodically| -|[#745](https://github.com/NVIDIA/spark-rapids/pull/745)|Revert Null Join Filter| -|[#741](https://github.com/NVIDIA/spark-rapids/pull/741)|Fix issue with parquet partitioned reads| -|[#733](https://github.com/NVIDIA/spark-rapids/pull/733)|Remove GPU Types from github| -|[#720](https://github.com/NVIDIA/spark-rapids/pull/720)|Stop removing GpuCoalesceBatches from non-AQE queries when AQE is enabled| -|[#729](https://github.com/NVIDIA/spark-rapids/pull/729)|Fix collect time metric in CoalesceBatches| -|[#640](https://github.com/NVIDIA/spark-rapids/pull/640)|Support running Pandas UDFs on GPUs in Python processes.| -|[#721](https://github.com/NVIDIA/spark-rapids/pull/721)|Add some more checks to databricks build scripts| -|[#714](https://github.com/NVIDIA/spark-rapids/pull/714)|Move spark 3.0.1-shims out of snapshot-shims| -|[#711](https://github.com/NVIDIA/spark-rapids/pull/711)|fix blossom checkout repo| -|[#709](https://github.com/NVIDIA/spark-rapids/pull/709)|[BUG] fix unexpected indentation issue in blossom yml| -|[#642](https://github.com/NVIDIA/spark-rapids/pull/642)|Init workflow for blossom-ci| -|[#705](https://github.com/NVIDIA/spark-rapids/pull/705)|Enable configuration check for cast string to timestamp| -|[#702](https://github.com/NVIDIA/spark-rapids/pull/702)|Update slack channel for Jenkins builds| -|[#701](https://github.com/NVIDIA/spark-rapids/pull/701)|fix checkout-ref for automerge| -|[#695](https://github.com/NVIDIA/spark-rapids/pull/695)|Fix spark-3.0.1 shim to be released| -|[#668](https://github.com/NVIDIA/spark-rapids/pull/668)|refactor automerge to support merge for protected branch| -|[#687](https://github.com/NVIDIA/spark-rapids/pull/687)|Include the UDF compiler in the dist jar| -|[#689](https://github.com/NVIDIA/spark-rapids/pull/689)|Change shims dependency to spark-3.0.1| -|[#677](https://github.com/NVIDIA/spark-rapids/pull/677)|Use multi-threaded parquet read with small files| -|[#638](https://github.com/NVIDIA/spark-rapids/pull/638)|Add Parquet-based cache serializer| -|[#613](https://github.com/NVIDIA/spark-rapids/pull/613)|Enable UCX + AQE| -|[#684](https://github.com/NVIDIA/spark-rapids/pull/684)|Enable test for literal string values in a select| -|[#686](https://github.com/NVIDIA/spark-rapids/pull/686)|Remove sorts when replacing sort aggregate if possible| -|[#675](https://github.com/NVIDIA/spark-rapids/pull/675)|Added TimeAdd| -|[#645](https://github.com/NVIDIA/spark-rapids/pull/645)|[window] Add GpuWindowExec requiredChildOrdering| -|[#676](https://github.com/NVIDIA/spark-rapids/pull/676)|fixUpJoinConsistency rule now works when AQE is enabled| -|[#683](https://github.com/NVIDIA/spark-rapids/pull/683)|Fix issues with cannonicalization of WrappedAggFunction| -|[#682](https://github.com/NVIDIA/spark-rapids/pull/682)|Fix path to start-slave.sh script in docs| -|[#673](https://github.com/NVIDIA/spark-rapids/pull/673)|Increase build timeouts on nightly and premerge builds| -|[#648](https://github.com/NVIDIA/spark-rapids/pull/648)|add signoff-check use github actions| -|[#593](https://github.com/NVIDIA/spark-rapids/pull/593)|Add support for isNaN and datetime related instructions in UDF compiler| -|[#666](https://github.com/NVIDIA/spark-rapids/pull/666)|[window] Disable GPU for COUNT(exp) queries| -|[#655](https://github.com/NVIDIA/spark-rapids/pull/655)|Implement AQE unit test for InsertAdaptiveSparkPlan| -|[#614](https://github.com/NVIDIA/spark-rapids/pull/614)|Fix for aggregation with multiple distinct and non distinct functions| -|[#657](https://github.com/NVIDIA/spark-rapids/pull/657)|Fix verify build after integration tests are run| -|[#660](https://github.com/NVIDIA/spark-rapids/pull/660)|Add in neverReplaceExec and several rules for it| -|[#639](https://github.com/NVIDIA/spark-rapids/pull/639)|BooleanType test shouldn't xfail| -|[#652](https://github.com/NVIDIA/spark-rapids/pull/652)|Mark UVM config as internal until supported| -|[#653](https://github.com/NVIDIA/spark-rapids/pull/653)|Move to the cudf-0.15 release| -|[#647](https://github.com/NVIDIA/spark-rapids/pull/647)|Improve warnings about AQE nodes not supported on GPU| -|[#646](https://github.com/NVIDIA/spark-rapids/pull/646)|Stop reporting zero metrics for GpuCustomShuffleReader| -|[#644](https://github.com/NVIDIA/spark-rapids/pull/644)|Small fix for race in catalog where a buffer could get spilled while …| -|[#623](https://github.com/NVIDIA/spark-rapids/pull/623)|Fix issues with canonicalization| -|[#599](https://github.com/NVIDIA/spark-rapids/pull/599)|[FEA] changelog generator| -|[#563](https://github.com/NVIDIA/spark-rapids/pull/563)|cudf and spark version info in artifacts| -|[#633](https://github.com/NVIDIA/spark-rapids/pull/633)|Fix leak if RebaseHelper throws during Parquet read| -|[#632](https://github.com/NVIDIA/spark-rapids/pull/632)|Copy function isSearchableType from Spark because signature changed in 3.0.1| -|[#583](https://github.com/NVIDIA/spark-rapids/pull/583)|Add udf compiler unit tests| -|[#617](https://github.com/NVIDIA/spark-rapids/pull/617)|Documentation updates for branch 0.2| -|[#616](https://github.com/NVIDIA/spark-rapids/pull/616)|Add config to reserve GPU memory| -|[#612](https://github.com/NVIDIA/spark-rapids/pull/612)|[REVIEW] Fix incorrect output from averages with filters in partial only mode| -|[#609](https://github.com/NVIDIA/spark-rapids/pull/609)|fix minor issues with instructions for building ucx| -|[#611](https://github.com/NVIDIA/spark-rapids/pull/611)|Added in profile to enable shims for SNAPSHOT releases| -|[#595](https://github.com/NVIDIA/spark-rapids/pull/595)|Parquet small file reading optimization| -|[#582](https://github.com/NVIDIA/spark-rapids/pull/582)|fix #579 Auto-merge between branches| -|[#536](https://github.com/NVIDIA/spark-rapids/pull/536)|Add test for skewed join optimization when AQE is enabled| -|[#603](https://github.com/NVIDIA/spark-rapids/pull/603)|Fix data size metric always 0 when using RAPIDS shuffle| -|[#600](https://github.com/NVIDIA/spark-rapids/pull/600)|Fix calculation of string data for compressed batches| -|[#597](https://github.com/NVIDIA/spark-rapids/pull/597)|Remove the xfail for parquet test_read_merge_schema on Databricks| -|[#591](https://github.com/NVIDIA/spark-rapids/pull/591)|Add ucx license in NOTICE-binary| -|[#596](https://github.com/NVIDIA/spark-rapids/pull/596)|Add Spark 3.0.2 to Shim layer| -|[#594](https://github.com/NVIDIA/spark-rapids/pull/594)|Filter nulls from joins where possible to improve performance.| -|[#590](https://github.com/NVIDIA/spark-rapids/pull/590)|Move GpuParquetScan/GpuOrcScan into Shim| -|[#588](https://github.com/NVIDIA/spark-rapids/pull/588)|xfail the tpch spark 3.1.0 tests that fail| -|[#572](https://github.com/NVIDIA/spark-rapids/pull/572)|Update buffer store to return compressed batches directly, add compression NVTX ranges| -|[#558](https://github.com/NVIDIA/spark-rapids/pull/558)|Fix unit tests when AQE is enabled| -|[#580](https://github.com/NVIDIA/spark-rapids/pull/580)|xfail the Spark 3.1.0 integration tests that fail | -|[#565](https://github.com/NVIDIA/spark-rapids/pull/565)|Minor improvements to TPC-DS benchmarking code| -|[#567](https://github.com/NVIDIA/spark-rapids/pull/567)|Explicitly disable AQE in one test| -|[#571](https://github.com/NVIDIA/spark-rapids/pull/571)|Fix Databricks shim layer for GpuFileSourceScanExec and GpuBroadcastExchangeExec| -|[#564](https://github.com/NVIDIA/spark-rapids/pull/564)|Add GPU decode time metric to scans| -|[#562](https://github.com/NVIDIA/spark-rapids/pull/562)|getCatalog can be called from the driver, and can return null| -|[#555](https://github.com/NVIDIA/spark-rapids/pull/555)|Fix build warnings for ColumnViewAccess| -|[#560](https://github.com/NVIDIA/spark-rapids/pull/560)|Fix databricks build for AQE support| -|[#557](https://github.com/NVIDIA/spark-rapids/pull/557)|Fix tests failing on Spark 3.1| -|[#547](https://github.com/NVIDIA/spark-rapids/pull/547)|Add GPU metrics to GpuFileSourceScanExec| -|[#462](https://github.com/NVIDIA/spark-rapids/pull/462)|Implement optimized AQE support so that exchanges run on GPU where possible| -|[#550](https://github.com/NVIDIA/spark-rapids/pull/550)|Document Parquet and ORC compression support| -|[#539](https://github.com/NVIDIA/spark-rapids/pull/539)|Update script to audit multiple Spark versions| -|[#543](https://github.com/NVIDIA/spark-rapids/pull/543)|Add metrics to GpuUnion operator| -|[#549](https://github.com/NVIDIA/spark-rapids/pull/549)|Move spark shim properties to top level pom| -|[#497](https://github.com/NVIDIA/spark-rapids/pull/497)|Add UDF compiler implementations| -|[#487](https://github.com/NVIDIA/spark-rapids/pull/487)|Add framework for batch compression of shuffle partitions| -|[#544](https://github.com/NVIDIA/spark-rapids/pull/544)|Add in driverExtraClassPath for standalone mode docs| -|[#546](https://github.com/NVIDIA/spark-rapids/pull/546)|Fix Spark 3.1.0 shim build error in GpuHashJoin| -|[#537](https://github.com/NVIDIA/spark-rapids/pull/537)|Use fresh SparkSession when capturing to avoid late capture of previous query| -|[#538](https://github.com/NVIDIA/spark-rapids/pull/538)|Revert "Temporary workaround for RMM initial pool size bug (#530)"| -|[#517](https://github.com/NVIDIA/spark-rapids/pull/517)|Add config to limit maximum RMM pool size| -|[#527](https://github.com/NVIDIA/spark-rapids/pull/527)|Add support for split and getArrayIndex| -|[#534](https://github.com/NVIDIA/spark-rapids/pull/534)|Fixes bugs around GpuShuffleEnv initialization| -|[#529](https://github.com/NVIDIA/spark-rapids/pull/529)|[BUG] Degenerate table metas were not getting copied to the heap| -|[#530](https://github.com/NVIDIA/spark-rapids/pull/530)|Temporary workaround for RMM initial pool size bug| -|[#526](https://github.com/NVIDIA/spark-rapids/pull/526)|Fix bug with nullability reporting in GpuFilterExec| -|[#521](https://github.com/NVIDIA/spark-rapids/pull/521)|Fix typo with databricks shim classname SparkShimServiceProvider| -|[#522](https://github.com/NVIDIA/spark-rapids/pull/522)|Use SQLConf instead of SparkConf when looking up SQL configs| -|[#518](https://github.com/NVIDIA/spark-rapids/pull/518)|Fix init order issue in GpuShuffleEnv when RAPIDS shuffle configured| -|[#514](https://github.com/NVIDIA/spark-rapids/pull/514)|Added clarification of RegExpReplace, DateDiff, made descriptive text consistent| -|[#506](https://github.com/NVIDIA/spark-rapids/pull/506)|Add in basic support for running tpcds like queries| -|[#504](https://github.com/NVIDIA/spark-rapids/pull/504)|Add ability to ignore tests depending on spark shim version| -|[#503](https://github.com/NVIDIA/spark-rapids/pull/503)|Remove unused async buffer spill support| -|[#501](https://github.com/NVIDIA/spark-rapids/pull/501)|disable codegen in 3.1 shim for hash join| -|[#466](https://github.com/NVIDIA/spark-rapids/pull/466)|Optimize and fix Api validation script| -|[#481](https://github.com/NVIDIA/spark-rapids/pull/481)|Codeowners| -|[#439](https://github.com/NVIDIA/spark-rapids/pull/439)|Check a PR has been committed using git signoff| -|[#319](https://github.com/NVIDIA/spark-rapids/pull/319)|Update partitioning logic in ShuffledBatchRDD| -|[#491](https://github.com/NVIDIA/spark-rapids/pull/491)|Temporarily ignore AQE integration tests| -|[#490](https://github.com/NVIDIA/spark-rapids/pull/490)|Fix Spark 3.1.0 build for HashJoin changes| -|[#482](https://github.com/NVIDIA/spark-rapids/pull/482)|Prevent bad practice in python tests| -|[#485](https://github.com/NVIDIA/spark-rapids/pull/485)|Show plan in assertion message if test fails| -|[#480](https://github.com/NVIDIA/spark-rapids/pull/480)|Fix link from README to getting-started.md| -|[#448](https://github.com/NVIDIA/spark-rapids/pull/448)|Preliminary support for keeping broadcast exchanges on GPU when AQE is enabled| -|[#478](https://github.com/NVIDIA/spark-rapids/pull/478)|Fall back to CPU for binary as string in parquet| -|[#477](https://github.com/NVIDIA/spark-rapids/pull/477)|Fix special case joins in broadcast nested loop join| -|[#469](https://github.com/NVIDIA/spark-rapids/pull/469)|Update HashAggregateSuite to work with AQE| -|[#475](https://github.com/NVIDIA/spark-rapids/pull/475)|Udf compiler pom followup| -|[#434](https://github.com/NVIDIA/spark-rapids/pull/434)|Add UDF compiler skeleton| -|[#474](https://github.com/NVIDIA/spark-rapids/pull/474)|Re-enable noscaladoc check| -|[#461](https://github.com/NVIDIA/spark-rapids/pull/461)|Fix comments style to pass scala style check| -|[#468](https://github.com/NVIDIA/spark-rapids/pull/468)|fix broken link| -|[#456](https://github.com/NVIDIA/spark-rapids/pull/456)|Add closeOnExcept to clean up code that closes resources only on exceptions| -|[#464](https://github.com/NVIDIA/spark-rapids/pull/464)|Turn off noscaladoc rule until codebase is fixed| -|[#449](https://github.com/NVIDIA/spark-rapids/pull/449)|Enforce NoScalaDoc rule in scalastyle checks| -|[#450](https://github.com/NVIDIA/spark-rapids/pull/450)|Enable scalastyle for shuffle plugin| -|[#451](https://github.com/NVIDIA/spark-rapids/pull/451)|Databricks remove unneeded files and fix build to not fail on rm when file missing| -|[#442](https://github.com/NVIDIA/spark-rapids/pull/442)|Shim layer support for Spark 3.0.0 Databricks| -|[#447](https://github.com/NVIDIA/spark-rapids/pull/447)|Add scalastyle plugin to shim module| -|[#426](https://github.com/NVIDIA/spark-rapids/pull/426)|Update BufferMeta to support multiple codec buffers per table| -|[#440](https://github.com/NVIDIA/spark-rapids/pull/440)|Run mortgage test both with AQE on and off| -|[#445](https://github.com/NVIDIA/spark-rapids/pull/445)|Added in StringRPad and StringLPad| -|[#422](https://github.com/NVIDIA/spark-rapids/pull/422)|Documentation updates| -|[#437](https://github.com/NVIDIA/spark-rapids/pull/437)|Fix bug with InSet and Strings| -|[#435](https://github.com/NVIDIA/spark-rapids/pull/435)|Add in checks for Parquet LEGACY date/time rebase| -|[#432](https://github.com/NVIDIA/spark-rapids/pull/432)|Fix batch use-after-close in partitioning, shuffle env init| -|[#423](https://github.com/NVIDIA/spark-rapids/pull/423)|Fix duplicates includes in assembly jar| -|[#418](https://github.com/NVIDIA/spark-rapids/pull/418)|CI Add unit tests running for Spark 3.0.1| -|[#421](https://github.com/NVIDIA/spark-rapids/pull/421)|Make it easier to run TPCxBB benchmarks from spark shell| -|[#413](https://github.com/NVIDIA/spark-rapids/pull/413)|Fix download link| -|[#414](https://github.com/NVIDIA/spark-rapids/pull/414)|Shim Layer to support multiple Spark versions | -|[#406](https://github.com/NVIDIA/spark-rapids/pull/406)|Update cast handling to deal with new libcudf casting limitations| -|[#405](https://github.com/NVIDIA/spark-rapids/pull/405)|Change slave->worker| -|[#395](https://github.com/NVIDIA/spark-rapids/pull/395)|Databricks doc updates| -|[#401](https://github.com/NVIDIA/spark-rapids/pull/401)|Extended the FAQ| -|[#398](https://github.com/NVIDIA/spark-rapids/pull/398)|Add tests for GpuPartition| -|[#352](https://github.com/NVIDIA/spark-rapids/pull/352)|Change spark tgz package name| -|[#397](https://github.com/NVIDIA/spark-rapids/pull/397)|Fix small bug in ShuffleBufferCatalog.hasActiveShuffle| -|[#286](https://github.com/NVIDIA/spark-rapids/pull/286)|[REVIEW] Updated join tests for cache| -|[#393](https://github.com/NVIDIA/spark-rapids/pull/393)|Contributor license agreement| -|[#389](https://github.com/NVIDIA/spark-rapids/pull/389)|Added in support for RangeExec| -|[#390](https://github.com/NVIDIA/spark-rapids/pull/390)|Ucx getting started| -|[#391](https://github.com/NVIDIA/spark-rapids/pull/391)|Hide slack channel in Jenkins scripts| -|[#387](https://github.com/NVIDIA/spark-rapids/pull/387)|Remove the term whitelist| -|[#365](https://github.com/NVIDIA/spark-rapids/pull/365)|[REVIEW] Timesub tests| -|[#383](https://github.com/NVIDIA/spark-rapids/pull/383)|Test utility to compare SQL query results between CPU and GPU| -|[#380](https://github.com/NVIDIA/spark-rapids/pull/380)|Fix databricks notebook link| -|[#378](https://github.com/NVIDIA/spark-rapids/pull/378)|Added in FAQ and fixed spelling| -|[#377](https://github.com/NVIDIA/spark-rapids/pull/377)|Update heading in configs.md| -|[#373](https://github.com/NVIDIA/spark-rapids/pull/373)|Modifying branch name to conform with rapidsai branch name change| -|[#376](https://github.com/NVIDIA/spark-rapids/pull/376)|Add our session extension correctly if there are other extensions configured| -|[#374](https://github.com/NVIDIA/spark-rapids/pull/374)|Fix rat issue for notebooks| -|[#364](https://github.com/NVIDIA/spark-rapids/pull/364)|Update Databricks patch for changes to GpuSortMergeJoin| -|[#371](https://github.com/NVIDIA/spark-rapids/pull/371)|fix typo and use regional bucket per GCP's update| -|[#359](https://github.com/NVIDIA/spark-rapids/pull/359)|Karthik changes| -|[#353](https://github.com/NVIDIA/spark-rapids/pull/353)|Fix broadcast nested loop join for the no column case| -|[#313](https://github.com/NVIDIA/spark-rapids/pull/313)|Additional tests for broadcast hash join| -|[#342](https://github.com/NVIDIA/spark-rapids/pull/342)|Implement build-side rules for shuffle hash join| -|[#349](https://github.com/NVIDIA/spark-rapids/pull/349)|Updated join code to treat null equality properly| -|[#335](https://github.com/NVIDIA/spark-rapids/pull/335)|Integration tests on spark 3.0.1-SNAPSHOT & 3.1.0-SNAPSHOT| -|[#346](https://github.com/NVIDIA/spark-rapids/pull/346)|Update the Title Header for Fine Tuning| -|[#344](https://github.com/NVIDIA/spark-rapids/pull/344)|Fix small typo in readme| -|[#331](https://github.com/NVIDIA/spark-rapids/pull/331)|Adds iterator and client unit tests, and prepares for more fetch failure handling| -|[#337](https://github.com/NVIDIA/spark-rapids/pull/337)|Fix Scala compile phase to allow Java classes referencing Scala classes| -|[#332](https://github.com/NVIDIA/spark-rapids/pull/332)|Match GPU overwritten functions with SQL functions from FunctionRegistry| -|[#339](https://github.com/NVIDIA/spark-rapids/pull/339)|Fix databricks build| -|[#338](https://github.com/NVIDIA/spark-rapids/pull/338)|Move GpuPartitioning to a separate file| -|[#310](https://github.com/NVIDIA/spark-rapids/pull/310)|Update release Jenkinsfile for Databricks| -|[#330](https://github.com/NVIDIA/spark-rapids/pull/330)|Hide private info in Jenkins scripts| -|[#324](https://github.com/NVIDIA/spark-rapids/pull/324)|Add in basic support for GpuCartesianProductExec| -|[#328](https://github.com/NVIDIA/spark-rapids/pull/328)|Enable slack notification for Databricks build| -|[#321](https://github.com/NVIDIA/spark-rapids/pull/321)|update databricks patch for GpuBroadcastNestedLoopJoinExec| -|[#322](https://github.com/NVIDIA/spark-rapids/pull/322)|Add oss.sonatype.org to download the cudf jar| -|[#320](https://github.com/NVIDIA/spark-rapids/pull/320)|Don't mount passwd/group to the container| -|[#258](https://github.com/NVIDIA/spark-rapids/pull/258)|Enable running TPCH tests with AQE enabled| -|[#318](https://github.com/NVIDIA/spark-rapids/pull/318)|Build docker image with Dockerfile| -|[#309](https://github.com/NVIDIA/spark-rapids/pull/309)|Update databricks patch to latest changes| -|[#312](https://github.com/NVIDIA/spark-rapids/pull/312)|Trigger branch-0.2 integration test| -|[#307](https://github.com/NVIDIA/spark-rapids/pull/307)|[Jenkins] Update the release script and Jenkinsfile| -|[#304](https://github.com/NVIDIA/spark-rapids/pull/304)|[DOC][Minor] Fix typo in spark config name.| -|[#303](https://github.com/NVIDIA/spark-rapids/pull/303)|Update compatibility doc for -0.0 issues| -|[#301](https://github.com/NVIDIA/spark-rapids/pull/301)|Add info about branches in README.md| -|[#296](https://github.com/NVIDIA/spark-rapids/pull/296)|Added in basic support for broadcast nested loop join| -|[#297](https://github.com/NVIDIA/spark-rapids/pull/297)|Databricks CI improvements and support runtime env parameter to xfail certain tests| -|[#292](https://github.com/NVIDIA/spark-rapids/pull/292)|Move artifacts version in version-def.sh| -|[#254](https://github.com/NVIDIA/spark-rapids/pull/254)|Cleanup QA tests| -|[#289](https://github.com/NVIDIA/spark-rapids/pull/289)|Clean up GpuCollectLimitMeta and add in metrics| -|[#287](https://github.com/NVIDIA/spark-rapids/pull/287)|Add in support for right join and fix issues build right| -|[#273](https://github.com/NVIDIA/spark-rapids/pull/273)|Added releases to the README.md| -|[#285](https://github.com/NVIDIA/spark-rapids/pull/285)|modify run_pyspark_from_build.sh to be bash 3 friendly| -|[#281](https://github.com/NVIDIA/spark-rapids/pull/281)|Add in support for Full Outer Join on non-null keys| -|[#274](https://github.com/NVIDIA/spark-rapids/pull/274)|Add RapidsDiskStore tests| -|[#259](https://github.com/NVIDIA/spark-rapids/pull/259)|Add RapidsHostMemoryStore tests| -|[#282](https://github.com/NVIDIA/spark-rapids/pull/282)|Update Databricks patch for 0.2 branch| -|[#261](https://github.com/NVIDIA/spark-rapids/pull/261)|Add conditional xfail test for DISTINCT aggregates with NaN| -|[#263](https://github.com/NVIDIA/spark-rapids/pull/263)|More time ops| -|[#256](https://github.com/NVIDIA/spark-rapids/pull/256)|Remove special cases for contains, startsWith, and endWith| -|[#253](https://github.com/NVIDIA/spark-rapids/pull/253)|Remove GpuAttributeReference and GpuSortOrder| -|[#271](https://github.com/NVIDIA/spark-rapids/pull/271)|Update the versions for 0.2.0 properly for the databricks build| -|[#162](https://github.com/NVIDIA/spark-rapids/pull/162)|Integration tests for corner cases in window functions.| -|[#264](https://github.com/NVIDIA/spark-rapids/pull/264)|Add a local mvn repo for nightly pipeline| -|[#262](https://github.com/NVIDIA/spark-rapids/pull/262)|Refer to branch-0.2| -|[#255](https://github.com/NVIDIA/spark-rapids/pull/255)|Revert change to make dependencies of shaded jar optional| -|[#257](https://github.com/NVIDIA/spark-rapids/pull/257)|Fix link to RAPIDS cudf in index.md| -|[#252](https://github.com/NVIDIA/spark-rapids/pull/252)|Update to 0.2.0-SNAPSHOT and cudf-0.15-SNAPSHOT| - -## Release 0.1 - -### Features -||| -|:---|:---| -|[#74](https://github.com/NVIDIA/spark-rapids/issues/74)|[FEA] Support ToUnixTimestamp| -|[#21](https://github.com/NVIDIA/spark-rapids/issues/21)|[FEA] NormalizeNansAndZeros| -|[#105](https://github.com/NVIDIA/spark-rapids/issues/105)|[FEA] integration tests for equi-joins| - -### Bugs Fixed -||| -|:---|:---| -|[#116](https://github.com/NVIDIA/spark-rapids/issues/116)|[BUG] calling replace with a NULL throws an exception| -|[#168](https://github.com/NVIDIA/spark-rapids/issues/168)|[BUG] GpuUnitTests Date tests leak column vectors| -|[#209](https://github.com/NVIDIA/spark-rapids/issues/209)|[BUG] Developers section in pom need to be updated| -|[#204](https://github.com/NVIDIA/spark-rapids/issues/204)|[BUG] Code coverage docs are out of date| -|[#154](https://github.com/NVIDIA/spark-rapids/issues/154)|[BUG] Incorrect output from partial-only averages with nulls| -|[#61](https://github.com/NVIDIA/spark-rapids/issues/61)|[BUG] Cannot disable Parquet, ORC, CSV reading when using FileSourceScanExec| - -### PRs -||| -|:---|:---| -|[#249](https://github.com/NVIDIA/spark-rapids/pull/249)|Compatability -> Compatibility| -|[#247](https://github.com/NVIDIA/spark-rapids/pull/247)|Add index.md for default doc page, fix table formatting for configs| -|[#241](https://github.com/NVIDIA/spark-rapids/pull/241)|Let default branch to master per the release rule| -|[#177](https://github.com/NVIDIA/spark-rapids/pull/177)|Fixed leaks in unit test and use ColumnarBatch for testing| -|[#243](https://github.com/NVIDIA/spark-rapids/pull/243)|Jenkins file for Databricks release| -|[#225](https://github.com/NVIDIA/spark-rapids/pull/225)|Make internal project dependencies optional for shaded artifact| -|[#242](https://github.com/NVIDIA/spark-rapids/pull/242)|Add site pages| -|[#221](https://github.com/NVIDIA/spark-rapids/pull/221)|Databricks Build Support| -|[#215](https://github.com/NVIDIA/spark-rapids/pull/215)|Remove CudfColumnVector| -|[#213](https://github.com/NVIDIA/spark-rapids/pull/213)|Add RapidsDeviceMemoryStore tests| -|[#214](https://github.com/NVIDIA/spark-rapids/pull/214)|[REVIEW] Test failure to pass Attribute as GpuAttribute| -|[#211](https://github.com/NVIDIA/spark-rapids/pull/211)|Add project leads to pom developer list| -|[#210](https://github.com/NVIDIA/spark-rapids/pull/210)|Updated coverage docs| -|[#195](https://github.com/NVIDIA/spark-rapids/pull/195)|Support public release for plugin jar| -|[#208](https://github.com/NVIDIA/spark-rapids/pull/208)|Remove unneeded comment from pom.xml| -|[#191](https://github.com/NVIDIA/spark-rapids/pull/191)|WindowExec handle different spark distributions| -|[#181](https://github.com/NVIDIA/spark-rapids/pull/181)|Remove INCOMPAT for NormalizeNanAndZero, KnownFloatingPointNormalized| -|[#196](https://github.com/NVIDIA/spark-rapids/pull/196)|Update Spark dependency to the released 3.0.0 artifacts| -|[#206](https://github.com/NVIDIA/spark-rapids/pull/206)|Change groupID to 'com.nvidia' in IT scripts| -|[#202](https://github.com/NVIDIA/spark-rapids/pull/202)|Fixed issue for contains when searching for an empty string| -|[#201](https://github.com/NVIDIA/spark-rapids/pull/201)|Fix name of scan| -|[#200](https://github.com/NVIDIA/spark-rapids/pull/200)|Fix issue with GpuAttributeReference not overrideing references| -|[#197](https://github.com/NVIDIA/spark-rapids/pull/197)|Fix metrics for writes| -|[#186](https://github.com/NVIDIA/spark-rapids/pull/186)|Fixed issue with nullability on concat| -|[#193](https://github.com/NVIDIA/spark-rapids/pull/193)|Add RapidsBufferCatalog tests| -|[#188](https://github.com/NVIDIA/spark-rapids/pull/188)|rebrand to com.nvidia instead of ai.rapids| -|[#189](https://github.com/NVIDIA/spark-rapids/pull/189)|Handle AggregateExpression having resultIds parameter instead of a single resultId| -|[#190](https://github.com/NVIDIA/spark-rapids/pull/190)|FileSourceScanExec can have logicalRelation parameter on some distributions| -|[#185](https://github.com/NVIDIA/spark-rapids/pull/185)|Update type of parameter of GpuExpandExec to make it consistent| -|[#172](https://github.com/NVIDIA/spark-rapids/pull/172)|Merge qa test to integration test| -|[#180](https://github.com/NVIDIA/spark-rapids/pull/180)|Add MetaUtils unit tests| -|[#171](https://github.com/NVIDIA/spark-rapids/pull/171)|Cleanup scaladoc warnings about missing links| -|[#176](https://github.com/NVIDIA/spark-rapids/pull/176)|Updated join tests to cover more data.| -|[#169](https://github.com/NVIDIA/spark-rapids/pull/169)|Remove dependency on shaded Spark artifact| -|[#174](https://github.com/NVIDIA/spark-rapids/pull/174)|Added in fallback tests| -|[#165](https://github.com/NVIDIA/spark-rapids/pull/165)|Move input metadata tests to pyspark| -|[#173](https://github.com/NVIDIA/spark-rapids/pull/173)|Fix setting local mode for tests| -|[#160](https://github.com/NVIDIA/spark-rapids/pull/160)|Integration tests for normalizing NaN/zeroes.| -|[#163](https://github.com/NVIDIA/spark-rapids/pull/163)|Ignore the order locally for repartition tests| -|[#157](https://github.com/NVIDIA/spark-rapids/pull/157)|Add partial and final only hash aggregate tests and fix nulls corner case for Average| -|[#159](https://github.com/NVIDIA/spark-rapids/pull/159)|Add integration tests for joins| -|[#158](https://github.com/NVIDIA/spark-rapids/pull/158)|Orc merge schema fallback and FileScan format configs| -|[#164](https://github.com/NVIDIA/spark-rapids/pull/164)|Fix compiler warnings| -|[#152](https://github.com/NVIDIA/spark-rapids/pull/152)|Moved cudf to 0.14 for CI| -|[#151](https://github.com/NVIDIA/spark-rapids/pull/151)|Switch CICD pipelines to Github| +## Older Releases +Changelog of older releases can be found at [docs/archives](/docs/archives) diff --git a/NOTICE-binary b/NOTICE-binary index 8066d8545c3..0c0021116d0 100644 --- a/NOTICE-binary +++ b/NOTICE-binary @@ -12,17 +12,6 @@ Copyright 2014 and onwards The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). ---------------------------------------------------------------------- - -Apache ORC -Copyright 2013-2019 The Apache Software Foundation - -This product includes software developed by The Apache Software -Foundation (http://www.apache.org/). - -This product includes software developed by Hewlett-Packard: -(c) Copyright [2014-2015] Hewlett-Packard Development Company, L.P - --------------------------------------------------------------------- UCF Consortium - Unified Communication X (UCX) diff --git a/aggregator/pom.xml b/aggregator/pom.xml index 4ea80017800..b4663bafa2c 100644 --- a/aggregator/pom.xml +++ b/aggregator/pom.xml @@ -88,53 +88,8 @@ - org.apache.orc. - ${rapids.shade.package}.orc. - - - org.apache.hadoop.hive. - ${rapids.shade.package}.hadoop.hive. - - - org.apache.hadoop.hive.conf.HiveConf - org.apache.hadoop.hive.ql.exec.FunctionRegistry - org.apache.hadoop.hive.ql.exec.UDF - org.apache.hadoop.hive.ql.exec.UDFMethodResolver - org.apache.hadoop.hive.ql.udf.UDFType - org.apache.hadoop.hive.ql.udf.generic.GenericUDF - org.apache.hadoop.hive.ql.udf.generic.GenericUDF$DeferredObject - org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils$ConversionHelper - org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector - org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory - org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory$ObjectInspectorOptions - org.apache.hadoop.hive.serde2.objectinspector.StructField - org.apache.hadoop.hive.serde2.typeinfo.TypeInfo - - - - org.apache.hive. - ${rapids.shade.package}.hive. - - - io.airlift.compress. - ${rapids.shade.package}.io.airlift.compress. - - - org.apache.commons.codec. - ${rapids.shade.package}.org.apache.commons.codec. - - - org.apache.commons.lang. - ${rapids.shade.package}.org.apache.commons.lang. - - - com.google - ${rapids.shade.package}.com.google + com.google.flatbuffers + ${rapids.shade.package}.com.google.flatbuffers diff --git a/common/pom.xml b/common/pom.xml new file mode 100644 index 00000000000..3f46ea8459f --- /dev/null +++ b/common/pom.xml @@ -0,0 +1,96 @@ + + + + + + 4.0.0 + + + com.nvidia + rapids-4-spark-parent + 22.04.0-SNAPSHOT + + + rapids-4-spark-common_2.12 + RAPIDS Accelerator for Apache Spark Common + Utility code that is common across the RAPIDS Accelerator projects + 22.04.0-SNAPSHOT + + + + org.scala-lang + scala-library + + + org.scalatest + scalatest_${scala.binary.version} + test + + + + + + + + ${project.build.directory}/extra-resources + true + + + ${project.basedir}/.. + META-INF + + + LICENSE + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + default-test-jar + none + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + true + + + + net.alchim31.maven + scala-maven-plugin + + + org.apache.rat + apache-rat-plugin + + + org.scalatest + scalatest-maven-plugin + + + + diff --git a/common/src/main/scala/com/nvidia/spark/rapids/CheckUtils.scala b/common/src/main/scala/com/nvidia/spark/rapids/CheckUtils.scala new file mode 100644 index 00000000000..65ab724cc50 --- /dev/null +++ b/common/src/main/scala/com/nvidia/spark/rapids/CheckUtils.scala @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids + +object CheckUtils { + def checkArgument(expression: Boolean, msg: String): Unit = { + if (!expression) throw new IllegalArgumentException(msg) + } +} diff --git a/common/src/main/scala/com/nvidia/spark/rapids/ThreadFactoryBuilder.scala b/common/src/main/scala/com/nvidia/spark/rapids/ThreadFactoryBuilder.scala new file mode 100644 index 00000000000..d61dd5a9c90 --- /dev/null +++ b/common/src/main/scala/com/nvidia/spark/rapids/ThreadFactoryBuilder.scala @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids + +import java.util.concurrent.{Executors, ThreadFactory} +import java.util.concurrent.atomic.AtomicLong + +// This is similar to Guava ThreadFactoryBuilder +// Avoid to use Guava as it is a messy dependency in practice. +class ThreadFactoryBuilder { + private var nameFormat = Option.empty[String] + private var daemon = Option.empty[Boolean] + + def setNameFormat(nameFormat: String): ThreadFactoryBuilder = { + nameFormat.format(0) + this.nameFormat = Some(nameFormat) + this + } + + def setDaemon(daemon: Boolean): ThreadFactoryBuilder = { + this.daemon = Some(daemon) + this + } + + def build(): ThreadFactory = { + val count = nameFormat.map(_ => new AtomicLong(0)) + new ThreadFactory() { + private val defaultThreadFactory = Executors.defaultThreadFactory + + override def newThread(r: Runnable): Thread = { + val thread = defaultThreadFactory.newThread(r) + nameFormat.foreach(f => thread.setName(f.format(count.get.getAndIncrement()))) + daemon.foreach(b => thread.setDaemon(b)) + thread + } + } + } +} diff --git a/common/src/test/scala/com/nvidia/spark/rapids/ThreadFactoryBuilderTest.scala b/common/src/test/scala/com/nvidia/spark/rapids/ThreadFactoryBuilderTest.scala new file mode 100644 index 00000000000..d71915f51d0 --- /dev/null +++ b/common/src/test/scala/com/nvidia/spark/rapids/ThreadFactoryBuilderTest.scala @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids + +import java.util.concurrent.{Callable, Executors} + +import org.scalatest.FunSuite + +class ThreadFactoryBuilderTest extends FunSuite { + + test("test thread factory builder") { + val pool1 = Executors.newFixedThreadPool(2, + new ThreadFactoryBuilder().setNameFormat("thread-pool1-1 %s").setDaemon(true).build()) + try { + var ret = pool1.submit(new Callable[String] { + override def call(): String = { + assert(Thread.currentThread().isDaemon) + assert(Thread.currentThread().getName == "thread-pool1-1 0") + "" + } + }) + // waits and retrieves the result, if above asserts failed, will get execution exception + ret.get() + ret = pool1.submit(() => { + assert(Thread.currentThread().isDaemon) + assert(Thread.currentThread().getName == "thread-pool1-1 1") + "" + }) + ret.get() + } finally { + pool1.shutdown() + } + + val pool2 = Executors.newFixedThreadPool(2, + new ThreadFactoryBuilder().setNameFormat("pool2-%d").build()) + try { + var ret = pool2.submit(new Callable[String] { + override def call(): String = { + assert(!Thread.currentThread().isDaemon) + assert(Thread.currentThread().getName == "pool2-0") + "" + } + }) + ret.get() + ret = pool2.submit(() => { + assert(!Thread.currentThread().isDaemon) + assert(Thread.currentThread().getName == "pool2-1") + "" + }) + ret.get() + } finally { + pool2.shutdown() + } + + val pool3 = Executors.newFixedThreadPool(2, + new ThreadFactoryBuilder().setNameFormat("pool3-%d").setDaemon(false).build()) + try { + pool3.submit(new Callable[String] { + override def call(): String = { + assert(!Thread.currentThread().isDaemon) + assert(Thread.currentThread().getName == "pool3-0") + "" + } + }).get() + } finally { + pool3.shutdown() + } + } +} diff --git a/docs/archives/CHANGELOG_0.1_to_0.5.md b/docs/archives/CHANGELOG_0.1_to_0.5.md new file mode 100644 index 00000000000..fa5412f7d8a --- /dev/null +++ b/docs/archives/CHANGELOG_0.1_to_0.5.md @@ -0,0 +1,1325 @@ +# Change log +Generated on 2022-01-28 + +## Release 0.5 + +### Features +||| +|:---|:---| +|[#938](https://github.com/NVIDIA/spark-rapids/issues/938)|[FEA] Have hashed shuffle match spark| +|[#1604](https://github.com/NVIDIA/spark-rapids/issues/1604)|[FEA] Support casting structs to strings | +|[#1920](https://github.com/NVIDIA/spark-rapids/issues/1920)|[FEA] Support murmur3 hashing of structs| +|[#2018](https://github.com/NVIDIA/spark-rapids/issues/2018)|[FEA] A way for user to find out the plugin version and cudf version in REPL| +|[#77](https://github.com/NVIDIA/spark-rapids/issues/77)|[FEA] Support ArrayContains| +|[#1721](https://github.com/NVIDIA/spark-rapids/issues/1721)|[FEA] build cudf jars with NVTX enabled| +|[#1782](https://github.com/NVIDIA/spark-rapids/issues/1782)|[FEA] Shim layers to support spark versions| +|[#1625](https://github.com/NVIDIA/spark-rapids/issues/1625)|[FEA] Support Decimal Casts to String and String to Decimal| +|[#166](https://github.com/NVIDIA/spark-rapids/issues/166)|[FEA] Support get_json_object| +|[#1698](https://github.com/NVIDIA/spark-rapids/issues/1698)|[FEA] Support casting structs to string| +|[#1912](https://github.com/NVIDIA/spark-rapids/issues/1912)|[FEA] Let `Scalar Pandas UDF ` support array of struct type.| +|[#1136](https://github.com/NVIDIA/spark-rapids/issues/1136)|[FEA] Audit: Script to list commits between different Spark versions/tags| +|[#1921](https://github.com/NVIDIA/spark-rapids/issues/1921)|[FEA] cudf version check should be lenient on later patch version| +|[#19](https://github.com/NVIDIA/spark-rapids/issues/19)|[FEA] Out of core sorts| + +### Performance +||| +|:---|:---| +|[#2090](https://github.com/NVIDIA/spark-rapids/issues/2090)|[FEA] Make row count estimates available to the cost-based optimizer| +|[#1341](https://github.com/NVIDIA/spark-rapids/issues/1341)|Optimize unnecessary columnar->row->columnar transitions with AQE| +|[#1558](https://github.com/NVIDIA/spark-rapids/issues/1558)|[FEA] Initialize UCX early| +|[#1633](https://github.com/NVIDIA/spark-rapids/issues/1633)|[FEA] Implement a cost-based optimizer| +|[#1727](https://github.com/NVIDIA/spark-rapids/issues/1727)|[FEA] Put RangePartitioner data path on the GPU| + +### Bugs Fixed +||| +|:---|:---| +|[#2279](https://github.com/NVIDIA/spark-rapids/issues/2279)|[BUG] Hash Partitioning can fail for very small batches| +|[#2314](https://github.com/NVIDIA/spark-rapids/issues/2314)|[BUG] v0.5.0 pre-release pytests join_test.py::test_hash_join_array FAILED on SPARK-EGX Yarn Cluster| +|[#2317](https://github.com/NVIDIA/spark-rapids/issues/2317)|[BUG] GpuColumnarToRowIterator can stop after receiving an empty batch| +|[#2244](https://github.com/NVIDIA/spark-rapids/issues/2244)|[BUG] Executors hanging when running NDS benchmarks| +|[#2278](https://github.com/NVIDIA/spark-rapids/issues/2278)|[BUG] FullOuter join can produce too many results| +|[#2220](https://github.com/NVIDIA/spark-rapids/issues/2220)|[BUG] csv_test.py::test_csv_fallback FAILED on the EMR Cluster| +|[#2225](https://github.com/NVIDIA/spark-rapids/issues/2225)|[BUG] GpuSort fails on tables containing arrays.| +|[#2232](https://github.com/NVIDIA/spark-rapids/issues/2232)|[BUG] hash_aggregate_test.py::test_hash_grpby_pivot FAILED on the Databricks Cluster| +|[#2231](https://github.com/NVIDIA/spark-rapids/issues/2231)|[BUG]string_test.py::test_re_replace FAILED on the Dataproc Cluster| +|[#2042](https://github.com/NVIDIA/spark-rapids/issues/2042)|[BUG] NDS q14a fails with "GpuColumnarToRow does not implement doExecuteBroadcast"| +|[#2203](https://github.com/NVIDIA/spark-rapids/issues/2203)|[BUG] Spark nightly cache tests fail with -- master flag| +|[#2230](https://github.com/NVIDIA/spark-rapids/issues/2230)|[BUG] qa_nightly_select_test.py::test_select FAILED on the Dataproc Cluster| +|[#1711](https://github.com/NVIDIA/spark-rapids/issues/1711)|[BUG] find a way to stop allocating from RMM on the shuffle-client thread| +|[#2109](https://github.com/NVIDIA/spark-rapids/issues/2109)|[BUG] Fix high priority violations detected by code analysis tools| +|[#2217](https://github.com/NVIDIA/spark-rapids/issues/2217)|[BUG] qa_nightly_select_test failure in test_select | +|[#2127](https://github.com/NVIDIA/spark-rapids/issues/2127)|[BUG] Parsing with two-digit year should fall back to CPU| +|[#2078](https://github.com/NVIDIA/spark-rapids/issues/2078)|[BUG] java.lang.ArithmeticException: divide by zero when spark.sql.ansi.enabled=true| +|[#2048](https://github.com/NVIDIA/spark-rapids/issues/2048)|[BUG] split function+ repartition result in "ai.rapids.cudf.CudaException: device-side assert triggered"| +|[#2036](https://github.com/NVIDIA/spark-rapids/issues/2036)|[BUG] Stackoverflow when writing wide parquet files.| +|[#1973](https://github.com/NVIDIA/spark-rapids/issues/1973)|[BUG] generate_expr_test FAILED on Dataproc Cluster| +|[#2079](https://github.com/NVIDIA/spark-rapids/issues/2079)|[BUG] koalas.sql fails with java.lang.ArrayIndexOutOfBoundsException| +|[#217](https://github.com/NVIDIA/spark-rapids/issues/217)|[BUG] CudaUtil should be removed| +|[#1550](https://github.com/NVIDIA/spark-rapids/issues/1550)|[BUG] The ORC output data of a query is not readable| +|[#2074](https://github.com/NVIDIA/spark-rapids/issues/2074)|[BUG] Intermittent NPE in RapidsBufferCatalog when running test suite| +|[#2027](https://github.com/NVIDIA/spark-rapids/issues/2027)|[BUG] udf_cudf_test.py integration tests fail | +|[#1899](https://github.com/NVIDIA/spark-rapids/issues/1899)|[BUG] Some queries fail when cost-based optimizations are enabled| +|[#1914](https://github.com/NVIDIA/spark-rapids/issues/1914)|[BUG] Add in float, double, timestamp, and date support to murmur3| +|[#2014](https://github.com/NVIDIA/spark-rapids/issues/2014)|[BUG] earlyStart option added in 0.5 can cause errors when starting UCX| +|[#1984](https://github.com/NVIDIA/spark-rapids/issues/1984)|[BUG] NDS q58 Decimal scale (59) cannot be greater than precision (38).| +|[#2001](https://github.com/NVIDIA/spark-rapids/issues/2001)|[BUG] RapidsShuffleManager didn't pass `dirs` to `getBlockData` from a wrapped `ShuffleBlockResolver`| +|[#1797](https://github.com/NVIDIA/spark-rapids/issues/1797)|[BUG] occasional crashes in CI| +|[#1861](https://github.com/NVIDIA/spark-rapids/issues/1861)|Encountered column data outside the range of input buffer| +|[#1905](https://github.com/NVIDIA/spark-rapids/issues/1905)|[BUG] Large concat task time in GpuShuffleCoalesce with pinned memory pool| +|[#1638](https://github.com/NVIDIA/spark-rapids/issues/1638)|[BUG] Tests `test_window_aggs_for_rows_collect_list` fails when there are null values in columns.| +|[#1864](https://github.com/NVIDIA/spark-rapids/issues/1864)|[BUG]HostColumnarToGPU inefficient when only doing count()| +|[#1862](https://github.com/NVIDIA/spark-rapids/issues/1862)|[BUG] spark 3.2.0-snapshot integration test failed due to conf change| +|[#1844](https://github.com/NVIDIA/spark-rapids/issues/1844)|[BUG] branch-0.5 nightly IT FAILED on the The mortgage ETL test "Could not read footer for file: file:/xxx/xxx.snappy.parquet"| +|[#1627](https://github.com/NVIDIA/spark-rapids/issues/1627)|[BUG] GDS exception when restoring spilled buffer| +|[#1802](https://github.com/NVIDIA/spark-rapids/issues/1802)|[BUG] Many decimal integration test failures for 0.5| + +### PRs +||| +|:---|:---| +|[#2326](https://github.com/NVIDIA/spark-rapids/pull/2326)|Update changelog for 0.5.0 release| +|[#2316](https://github.com/NVIDIA/spark-rapids/pull/2316)|Update doc to note that single quoted json strings are not ok| +|[#2319](https://github.com/NVIDIA/spark-rapids/pull/2319)|Disable hash partitioning on arrays| +|[#2318](https://github.com/NVIDIA/spark-rapids/pull/2318)|Fix ColumnarToRowIterator handling of empty batches| +|[#2304](https://github.com/NVIDIA/spark-rapids/pull/2304)|Update CHANGELOG.md| +|[#2301](https://github.com/NVIDIA/spark-rapids/pull/2301)|Update doc to reflect nanosleep problem with 460.32.03| +|[#2298](https://github.com/NVIDIA/spark-rapids/pull/2298)|Update changelog for v0.5.0 release [skip ci]| +|[#2293](https://github.com/NVIDIA/spark-rapids/pull/2293)|update cudf version to 0.19.2| +|[#2289](https://github.com/NVIDIA/spark-rapids/pull/2289)|Update docs to warn against 450.80.02 driver with 10.x toolkit| +|[#2285](https://github.com/NVIDIA/spark-rapids/pull/2285)|Require single batch for full outer join streaming| +|[#2281](https://github.com/NVIDIA/spark-rapids/pull/2281)|Remove download section for unreleased 0.4.2| +|[#2264](https://github.com/NVIDIA/spark-rapids/pull/2264)|Add spark312 and spark320 versions of cache serializer| +|[#2254](https://github.com/NVIDIA/spark-rapids/pull/2254)|updated gcp docs with custom dataproc image instructions| +|[#2247](https://github.com/NVIDIA/spark-rapids/pull/2247)|Allow specifying a superclass for non-GPU execs| +|[#2235](https://github.com/NVIDIA/spark-rapids/pull/2235)|Fix distributed cache to read requested schema | +|[#2261](https://github.com/NVIDIA/spark-rapids/pull/2261)|Make CBO row count test more robust| +|[#2237](https://github.com/NVIDIA/spark-rapids/pull/2237)|update cudf version to 0.19.1| +|[#2240](https://github.com/NVIDIA/spark-rapids/pull/2240)|Get the correct 'PIPESTATUS' in bash [skip ci]| +|[#2242](https://github.com/NVIDIA/spark-rapids/pull/2242)|Add shuffle doc section on the periodicGC configuration| +|[#2251](https://github.com/NVIDIA/spark-rapids/pull/2251)|Fix issue when out of core sorting nested data types| +|[#2204](https://github.com/NVIDIA/spark-rapids/pull/2204)|Run nightly tests for ParquetCachedBatchSerializer| +|[#2245](https://github.com/NVIDIA/spark-rapids/pull/2245)|Fix pivot bug for decimalType| +|[#2093](https://github.com/NVIDIA/spark-rapids/pull/2093)|Initial implementation of row count estimates in cost-based optimizer| +|[#2188](https://github.com/NVIDIA/spark-rapids/pull/2188)|Support GPU broadcast exchange reuse to feed CPU BHJ when AQE is enabled| +|[#2227](https://github.com/NVIDIA/spark-rapids/pull/2227)|ParquetCachedBatchSerializer broadcast AllConfs instead of SQLConf to fix distributed mode| +|[#2223](https://github.com/NVIDIA/spark-rapids/pull/2223)|Adds subquery aggregate tests from SPARK-31620| +|[#2222](https://github.com/NVIDIA/spark-rapids/pull/2222)|Remove groupId already specified in parent pom| +|[#2209](https://github.com/NVIDIA/spark-rapids/pull/2209)|Fixed a few issues with out of core sort| +|[#2218](https://github.com/NVIDIA/spark-rapids/pull/2218)|Fix incorrect RegExpReplace children handling on Spark 3.1+| +|[#2207](https://github.com/NVIDIA/spark-rapids/pull/2207)|fix batch size default values in the tuning guide| +|[#2208](https://github.com/NVIDIA/spark-rapids/pull/2208)|Revert "add nightly cache tests (#2083)"| +|[#2206](https://github.com/NVIDIA/spark-rapids/pull/2206)|Fix shim301db build| +|[#2192](https://github.com/NVIDIA/spark-rapids/pull/2192)|Fix index-based access to the head elements| +|[#2210](https://github.com/NVIDIA/spark-rapids/pull/2210)|Avoid redundant collection conversions| +|[#2190](https://github.com/NVIDIA/spark-rapids/pull/2190)|JNI fixes for StringWordCount native UDF example| +|[#2086](https://github.com/NVIDIA/spark-rapids/pull/2086)|Updating documentation for data format support| +|[#2172](https://github.com/NVIDIA/spark-rapids/pull/2172)|Remove easy unused symbols| +|[#2089](https://github.com/NVIDIA/spark-rapids/pull/2089)|Update PandasUDF doc| +|[#2195](https://github.com/NVIDIA/spark-rapids/pull/2195)|fix cudf 0.19.0 download link [skip ci]| +|[#2175](https://github.com/NVIDIA/spark-rapids/pull/2175)|Branch 0.5 doc update| +|[#2168](https://github.com/NVIDIA/spark-rapids/pull/2168)|Simplify GpuExpressions w/ withResourceIfAllowed| +|[#2055](https://github.com/NVIDIA/spark-rapids/pull/2055)|Support PivotFirst| +|[#2183](https://github.com/NVIDIA/spark-rapids/pull/2183)|GpuParquetScan#readBufferToTable remove dead code| +|[#2129](https://github.com/NVIDIA/spark-rapids/pull/2129)|Fall back to CPU when parsing two-digit years| +|[#2083](https://github.com/NVIDIA/spark-rapids/pull/2083)|add nightly cache tests| +|[#2151](https://github.com/NVIDIA/spark-rapids/pull/2151)|add corresponding close call for HostMemoryOutputStream| +|[#2169](https://github.com/NVIDIA/spark-rapids/pull/2169)|Work around bug in Spark for integration test| +|[#2130](https://github.com/NVIDIA/spark-rapids/pull/2130)|Fix divide-by-zero in GpuAverage with ansi mode| +|[#2149](https://github.com/NVIDIA/spark-rapids/pull/2149)|Auto generate the supported types for the file formats| +|[#2072](https://github.com/NVIDIA/spark-rapids/pull/2072)|Disable CSV parsing by default and update tests to better show what is left| +|[#2157](https://github.com/NVIDIA/spark-rapids/pull/2157)|fix merge conflict for 0.4.2 [skip ci]| +|[#2144](https://github.com/NVIDIA/spark-rapids/pull/2144)|Allow array and struct types to pass thru when doing join| +|[#2145](https://github.com/NVIDIA/spark-rapids/pull/2145)|Avoid GPU shuffle for round-robin of unsortable types| +|[#2021](https://github.com/NVIDIA/spark-rapids/pull/2021)|Add in support for murmur3 hashing of structs| +|[#2128](https://github.com/NVIDIA/spark-rapids/pull/2128)|Add in Partition type check support| +|[#2116](https://github.com/NVIDIA/spark-rapids/pull/2116)|Add dynamic Spark configuration for Databricks| +|[#2132](https://github.com/NVIDIA/spark-rapids/pull/2132)|Log plugin and cudf versions on startup| +|[#2135](https://github.com/NVIDIA/spark-rapids/pull/2135)|Disable Spark 3.2 shim by default| +|[#2125](https://github.com/NVIDIA/spark-rapids/pull/2125)|enable auto-merge from 0.5 to 0.6 [skip ci]| +|[#2120](https://github.com/NVIDIA/spark-rapids/pull/2120)|Materialize Stream before serialization| +|[#2119](https://github.com/NVIDIA/spark-rapids/pull/2119)|Add more comprehensive documentation on supported date formats| +|[#1717](https://github.com/NVIDIA/spark-rapids/pull/1717)|Decimal32 support| +|[#2114](https://github.com/NVIDIA/spark-rapids/pull/2114)|Modified the Download page for 0.4.1 and updated doc to point to K8s guide| +|[#2106](https://github.com/NVIDIA/spark-rapids/pull/2106)|Fix some buffer leaks| +|[#2097](https://github.com/NVIDIA/spark-rapids/pull/2097)|fix the bound row project empty issue in row frame| +|[#2099](https://github.com/NVIDIA/spark-rapids/pull/2099)|Remove verbose log prints to make the build/test log clean| +|[#2105](https://github.com/NVIDIA/spark-rapids/pull/2105)|Cleanup prior Spark sessions in tests consistently| +|[#2104](https://github.com/NVIDIA/spark-rapids/pull/2104)| Clone apache spark source code to parse the git commit IDs| +|[#2095](https://github.com/NVIDIA/spark-rapids/pull/2095)|fix refcount when materializing device buffer from GDS| +|[#2100](https://github.com/NVIDIA/spark-rapids/pull/2100)|[BUG] add wget for fetching conda [skip ci]| +|[#2096](https://github.com/NVIDIA/spark-rapids/pull/2096)|Adjust images for integration tests| +|[#2094](https://github.com/NVIDIA/spark-rapids/pull/2094)|Changed name of parquet files for Mortgage ETL Integration test| +|[#2035](https://github.com/NVIDIA/spark-rapids/pull/2035)|Accelerate data transfer for map Pandas UDF plan| +|[#2050](https://github.com/NVIDIA/spark-rapids/pull/2050)|stream shuffle buffers from GDS to UCX| +|[#2084](https://github.com/NVIDIA/spark-rapids/pull/2084)|Enable ORC write by default| +|[#2088](https://github.com/NVIDIA/spark-rapids/pull/2088)|Upgrade ScalaTest plugin to respect JAVA_HOME| +|[#1932](https://github.com/NVIDIA/spark-rapids/pull/1932)|Create a getting started on K8s page| +|[#2080](https://github.com/NVIDIA/spark-rapids/pull/2080)|Improve error message after failed RMM shutdown| +|[#2064](https://github.com/NVIDIA/spark-rapids/pull/2064)|Optimize unnecessary columnar->row->columnar transitions with AQE| +|[#2025](https://github.com/NVIDIA/spark-rapids/pull/2025)|Update the doc for pandas udf on databricks| +|[#2059](https://github.com/NVIDIA/spark-rapids/pull/2059)|Add the flag 'TEST_TYPE' to avoid integration tests silently skipping some test cases| +|[#2075](https://github.com/NVIDIA/spark-rapids/pull/2075)|Remove debug println from CBO test| +|[#2046](https://github.com/NVIDIA/spark-rapids/pull/2046)|support casting Decimal to String| +|[#1812](https://github.com/NVIDIA/spark-rapids/pull/1812)|allow spilled buffers to be unspilled| +|[#2061](https://github.com/NVIDIA/spark-rapids/pull/2061)|Run the pandas udf using cudf on Databricks| +|[#1893](https://github.com/NVIDIA/spark-rapids/pull/1893)|Plug-in support for get_json_object| +|[#2044](https://github.com/NVIDIA/spark-rapids/pull/2044)|Use partition for GPU hash partitioning| +|[#1954](https://github.com/NVIDIA/spark-rapids/pull/1954)|Fix CBO bug where incompatible plans were produced with AQE on| +|[#2049](https://github.com/NVIDIA/spark-rapids/pull/2049)|Remove incompatable int overflow checking| +|[#2056](https://github.com/NVIDIA/spark-rapids/pull/2056)|Remove Spark 3.2 from premerge and nightly CI run| +|[#1814](https://github.com/NVIDIA/spark-rapids/pull/1814)|Struct to string casting functionality| +|[#2037](https://github.com/NVIDIA/spark-rapids/pull/2037)|Fix warnings from use of deprecated cudf methods| +|[#2033](https://github.com/NVIDIA/spark-rapids/pull/2033)|Bump up pre-merge OS from ubuntu 16 to ubuntu 18 [skip ci]| +|[#1883](https://github.com/NVIDIA/spark-rapids/pull/1883)|Enable sort for single-level nesting struct columns on GPU| +|[#2016](https://github.com/NVIDIA/spark-rapids/pull/2016)|Refactor logic for parallel testing| +|[#2022](https://github.com/NVIDIA/spark-rapids/pull/2022)|Update order by to not load native libraries when sorting| +|[#2017](https://github.com/NVIDIA/spark-rapids/pull/2017)|Add in murmur3 support for float, double, date and timestamp| +|[#1981](https://github.com/NVIDIA/spark-rapids/pull/1981)|Fix GpuSize| +|[#1999](https://github.com/NVIDIA/spark-rapids/pull/1999)|support casting string to decimal| +|[#2006](https://github.com/NVIDIA/spark-rapids/pull/2006)|Enable windowed `collect_list` by default| +|[#2000](https://github.com/NVIDIA/spark-rapids/pull/2000)|Use Spark's HybridRowQueue to avoid MemoryConsumer API shim| +|[#2015](https://github.com/NVIDIA/spark-rapids/pull/2015)|Fix bug where rkey buffer is getting advanced after the first handshake| +|[#2007](https://github.com/NVIDIA/spark-rapids/pull/2007)|Fix unknown column name error when filtering ORC file with no names| +|[#2005](https://github.com/NVIDIA/spark-rapids/pull/2005)|Update to new is_before_spark_311 function name| +|[#1944](https://github.com/NVIDIA/spark-rapids/pull/1944)|Support running scalar pandas UDF with array type.| +|[#1991](https://github.com/NVIDIA/spark-rapids/pull/1991)|Fixes creation of invalid DecimalType in GpuDivide.tagExprForGpu| +|[#1958](https://github.com/NVIDIA/spark-rapids/pull/1958)|Support legacy behavior of parameterless count | +|[#1919](https://github.com/NVIDIA/spark-rapids/pull/1919)|Add support for Structs for UnionExec| +|[#2002](https://github.com/NVIDIA/spark-rapids/pull/2002)|Pass dirs to getBlockData for a wrapped shuffle resolver| +|[#1983](https://github.com/NVIDIA/spark-rapids/pull/1983)|document building against different CUDA Toolkit versions| +|[#1994](https://github.com/NVIDIA/spark-rapids/pull/1994)|Merge 0.4 to 0.5 [skip ci]| +|[#1982](https://github.com/NVIDIA/spark-rapids/pull/1982)|Update ORC pushdown filter building to latest Spark logic| +|[#1978](https://github.com/NVIDIA/spark-rapids/pull/1978)|Add audit script to list commits from Spark| +|[#1976](https://github.com/NVIDIA/spark-rapids/pull/1976)|Temp fix for parquet write changes| +|[#1970](https://github.com/NVIDIA/spark-rapids/pull/1970)|add maven profiles for supported CUDA versions| +|[#1951](https://github.com/NVIDIA/spark-rapids/pull/1951)|Branch 0.5 doc remove numpartitions| +|[#1967](https://github.com/NVIDIA/spark-rapids/pull/1967)|Update FAQ for Dataset API and format supported versions| +|[#1972](https://github.com/NVIDIA/spark-rapids/pull/1972)|support GpuSize| +|[#1966](https://github.com/NVIDIA/spark-rapids/pull/1966)|add xml report for codecov| +|[#1955](https://github.com/NVIDIA/spark-rapids/pull/1955)|Fix typo in Arrow optimization config| +|[#1956](https://github.com/NVIDIA/spark-rapids/pull/1956)|Fix NPE in plugin shutdown| +|[#1930](https://github.com/NVIDIA/spark-rapids/pull/1930)|Relax cudf version check for patch-level versions| +|[#1787](https://github.com/NVIDIA/spark-rapids/pull/1787)|support distributed file path in cloud environment| +|[#1961](https://github.com/NVIDIA/spark-rapids/pull/1961)|change premege GPU_TYPE from secret to global env [skip ci]| +|[#1957](https://github.com/NVIDIA/spark-rapids/pull/1957)|Update Spark 3.1.2 shim for float upcast behavior| +|[#1889](https://github.com/NVIDIA/spark-rapids/pull/1889)|Decimal DIV changes | +|[#1947](https://github.com/NVIDIA/spark-rapids/pull/1947)|Move doc of Pandas UDF to additional-functionality| +|[#1938](https://github.com/NVIDIA/spark-rapids/pull/1938)|Add spark.executor.resource.gpu.amount=1 to YARN and K8s docs| +|[#1937](https://github.com/NVIDIA/spark-rapids/pull/1937)|Fix merge conflict with branch-0.4| +|[#1878](https://github.com/NVIDIA/spark-rapids/pull/1878)|spillable cache for GpuCartesianRDD| +|[#1843](https://github.com/NVIDIA/spark-rapids/pull/1843)|Refactor GpuGenerateExec and Explode| +|[#1933](https://github.com/NVIDIA/spark-rapids/pull/1933)|Split DB scripts to make them common for the build and IT pipeline| +|[#1935](https://github.com/NVIDIA/spark-rapids/pull/1935)|Update Alias SQL quoting and float-to-timestamp casting to match Spark 3.2| +|[#1926](https://github.com/NVIDIA/spark-rapids/pull/1926)|Consolidate RAT settings in parent pom| +|[#1918](https://github.com/NVIDIA/spark-rapids/pull/1918)|Minor code cleanup in dateTImeExpressions| +|[#1906](https://github.com/NVIDIA/spark-rapids/pull/1906)|Remove get call on timeZoneId| +|[#1908](https://github.com/NVIDIA/spark-rapids/pull/1908)|Remove the Scala version of Mortgage ETL tests from nightly test| +|[#1894](https://github.com/NVIDIA/spark-rapids/pull/1894)|Modified Download Page to re-order the items and change the format of download links| +|[#1909](https://github.com/NVIDIA/spark-rapids/pull/1909)|Avoid pinned memory for shuffle host buffers| +|[#1891](https://github.com/NVIDIA/spark-rapids/pull/1891)|Connect UCX endpoints early during app startup| +|[#1877](https://github.com/NVIDIA/spark-rapids/pull/1877)|remove docker build in pre-merge [skip ci]| +|[#1830](https://github.com/NVIDIA/spark-rapids/pull/1830)|Enable the tests for collect over window.| +|[#1882](https://github.com/NVIDIA/spark-rapids/pull/1882)|GpuArrowColumnarBatchBuilder retains the references of ArrowBuf until HostToGpuCoalesceIterator put them into device| +|[#1868](https://github.com/NVIDIA/spark-rapids/pull/1868)|Increase row limit when doing count() for HostColumnarToGpu | +|[#1855](https://github.com/NVIDIA/spark-rapids/pull/1855)|Expose row count statistics in GpuShuffleExchangeExec| +|[#1875](https://github.com/NVIDIA/spark-rapids/pull/1875)|Fix merge conflict with branch-0.4| +|[#1841](https://github.com/NVIDIA/spark-rapids/pull/1841)|Add in support for DateAddInterval| +|[#1869](https://github.com/NVIDIA/spark-rapids/pull/1869)|Fix tests for Spark 3.2.0 shim| +|[#1858](https://github.com/NVIDIA/spark-rapids/pull/1858)|fix shuffle manager doc on ucx library path| +|[#1836](https://github.com/NVIDIA/spark-rapids/pull/1836)|Add shim for Spark 3.1.2| +|[#1852](https://github.com/NVIDIA/spark-rapids/pull/1852)|Fix Part Suite Tests| +|[#1616](https://github.com/NVIDIA/spark-rapids/pull/1616)|Cost-based optimizer| +|[#1834](https://github.com/NVIDIA/spark-rapids/pull/1834)|Add shim for Spark 3.0.3| +|[#1839](https://github.com/NVIDIA/spark-rapids/pull/1839)|Refactor join code to reduce duplicated code| +|[#1848](https://github.com/NVIDIA/spark-rapids/pull/1848)|Fix merge conflict with branch-0.4| +|[#1796](https://github.com/NVIDIA/spark-rapids/pull/1796)|Have most of range partitioning run on the GPU| +|[#1845](https://github.com/NVIDIA/spark-rapids/pull/1845)|Fix fails on the mortgage ETL test| +|[#1829](https://github.com/NVIDIA/spark-rapids/pull/1829)|Cleanup unused Jenkins files and scripts| +|[#1704](https://github.com/NVIDIA/spark-rapids/pull/1704)|Create a shim for Spark 3.2.0 development| +|[#1838](https://github.com/NVIDIA/spark-rapids/pull/1838)|Make databricks build.sh more convenient for dev| +|[#1835](https://github.com/NVIDIA/spark-rapids/pull/1835)|Fix merge conflict with branch-0.4| +|[#1808](https://github.com/NVIDIA/spark-rapids/pull/1808)|Update mortgage tests to support reading multiple dataset formats| +|[#1822](https://github.com/NVIDIA/spark-rapids/pull/1822)|Fix conflict 0.4 to 0.5| +|[#1807](https://github.com/NVIDIA/spark-rapids/pull/1807)|Fix merge conflict between branch-0.4 and branch-0.5| +|[#1788](https://github.com/NVIDIA/spark-rapids/pull/1788)|Spill metrics everywhere| +|[#1719](https://github.com/NVIDIA/spark-rapids/pull/1719)|Add in out of core sort| +|[#1728](https://github.com/NVIDIA/spark-rapids/pull/1728)|Skip RAPIDS accelerated Java UDF tests if UDF fails to load| +|[#1689](https://github.com/NVIDIA/spark-rapids/pull/1689)|Update docs for plugin 0.5.0-SNAPSHOT and cudf 0.19-SNAPSHOT| +|[#1682](https://github.com/NVIDIA/spark-rapids/pull/1682)|init CI/CD dependencies branch-0.5| + +## Release 0.4.1 + +### Bugs Fixed +||| +|:---|:---| +|[#1985](https://github.com/NVIDIA/spark-rapids/issues/1985)|[BUG] broadcast exchange can fail on 0.4| + +### PRs +||| +|:---|:---| +|[#1995](https://github.com/NVIDIA/spark-rapids/pull/1995)|update changelog 0.4.1 [skip ci]| +|[#1990](https://github.com/NVIDIA/spark-rapids/pull/1990)|Prepare for v0.4.1 release| +|[#1988](https://github.com/NVIDIA/spark-rapids/pull/1988)|broadcast exchange can fail when job group set| + +## Release 0.4 + +### Features +||| +|:---|:---| +|[#1773](https://github.com/NVIDIA/spark-rapids/issues/1773)|[FEA] Spark 3.0.2 release support| +|[#80](https://github.com/NVIDIA/spark-rapids/issues/80)|[FEA] Support the struct SQL function| +|[#76](https://github.com/NVIDIA/spark-rapids/issues/76)|[FEA] Support CreateArray| +|[#1635](https://github.com/NVIDIA/spark-rapids/issues/1635)|[FEA] RAPIDS accelerated Java UDF| +|[#1333](https://github.com/NVIDIA/spark-rapids/issues/1333)|[FEA] Support window operations on Decimal| +|[#1419](https://github.com/NVIDIA/spark-rapids/issues/1419)|[FEA] Support GPU accelerated UDF alternative for higher order function "aggregate" over window| +|[#1580](https://github.com/NVIDIA/spark-rapids/issues/1580)|[FEA] Support Decimal for ParquetCachedBatchSerializer| +|[#1600](https://github.com/NVIDIA/spark-rapids/issues/1600)|[FEA] Support ScalarSubquery| +|[#1072](https://github.com/NVIDIA/spark-rapids/issues/1072)|[FEA] Support for a custom DataSource V2 which supplies Arrow data| +|[#906](https://github.com/NVIDIA/spark-rapids/issues/906)|[FEA] Clarify query explanation to directly state what will run on GPU| +|[#1335](https://github.com/NVIDIA/spark-rapids/issues/1335)|[FEA] Support CollectLimitExec for decimal| +|[#1485](https://github.com/NVIDIA/spark-rapids/issues/1485)|[FEA] Decimal Support for Parquet Write| +|[#1329](https://github.com/NVIDIA/spark-rapids/issues/1329)|[FEA] Decimal support for multiply int div, add, subtract and null safe equals| +|[#1351](https://github.com/NVIDIA/spark-rapids/issues/1351)|[FEA] Execute UDFs that provide a RAPIDS execution path| +|[#1330](https://github.com/NVIDIA/spark-rapids/issues/1330)|[FEA] Support Decimal Casts| +|[#1353](https://github.com/NVIDIA/spark-rapids/issues/1353)|[FEA] Example of RAPIDS UDF using custom GPU code| +|[#1487](https://github.com/NVIDIA/spark-rapids/issues/1487)|[FEA] Change spark 3.1.0 to 3.1.1| +|[#1334](https://github.com/NVIDIA/spark-rapids/issues/1334)|[FEA] Add support for count aggregate on decimal| +|[#1325](https://github.com/NVIDIA/spark-rapids/issues/1325)|[FEA] Add in join support for decimal| +|[#1326](https://github.com/NVIDIA/spark-rapids/issues/1326)|[FEA] Add in Broadcast support for decimal values| +|[#37](https://github.com/NVIDIA/spark-rapids/issues/37)|[FEA] round and bround SQL functions| +|[#78](https://github.com/NVIDIA/spark-rapids/issues/78)|[FEA] Support CreateNamedStruct function| +|[#1331](https://github.com/NVIDIA/spark-rapids/issues/1331)|[FEA] UnionExec and ExpandExec support for decimal| +|[#1332](https://github.com/NVIDIA/spark-rapids/issues/1332)|[FEA] Support CaseWhen, Coalesce and IfElse for decimal| +|[#937](https://github.com/NVIDIA/spark-rapids/issues/937)|[FEA] have murmur3 hash function that matches exactly with spark| +|[#1324](https://github.com/NVIDIA/spark-rapids/issues/1324)|[FEA] Support Parquet Read of Decimal FIXED_LENGTH_BYTE_ARRAY| +|[#1428](https://github.com/NVIDIA/spark-rapids/issues/1428)|[FEA] Add support for unary decimal operations abs, floor, ceil, unary - and unary +| +|[#1375](https://github.com/NVIDIA/spark-rapids/issues/1375)|[FEA] Add log statement for what the concurrentGpuTasks tasks is set to on executor startup| +|[#1352](https://github.com/NVIDIA/spark-rapids/issues/1352)|[FEA] Example of RAPIDS UDF using cudf Java APIs| +|[#1328](https://github.com/NVIDIA/spark-rapids/issues/1328)|[FEA] Support sorting and shuffle of decimal| +|[#1316](https://github.com/NVIDIA/spark-rapids/issues/1316)|[FEA] Support simple DECIMAL aggregates| + +### Performance +||| +|:---|:---| +|[#1435](https://github.com/NVIDIA/spark-rapids/issues/1435)|[FEA]Improve the file reading by using local file caching| +|[#1738](https://github.com/NVIDIA/spark-rapids/issues/1738)|[FEA] Reduce regex usage in CAST string to date/timestamp| +|[#987](https://github.com/NVIDIA/spark-rapids/issues/987)|[FEA] Optimize CAST from string to temporal types by using cuDF is_timestamp function| +|[#1594](https://github.com/NVIDIA/spark-rapids/issues/1594)|[FEA] RAPIDS accelerated ScalaUDF| +|[#103](https://github.com/NVIDIA/spark-rapids/issues/103)|[FEA] GPU version of TakeOrderedAndProject| +|[#1024](https://github.com/NVIDIA/spark-rapids/issues/1024)|Cleanup RAPIDS transport calls to `receive`| +|[#1366](https://github.com/NVIDIA/spark-rapids/issues/1366)|Seeing performance differences of multi-threaded/coalesce/perfile Parquet reader type for a single file| +|[#1200](https://github.com/NVIDIA/spark-rapids/issues/1200)|[FEA] Accelerate the scan speed for coalescing parquet reader when reading files from multiple partitioned folders| + +### Bugs Fixed +||| +|:---|:---| +|[#1885](https://github.com/NVIDIA/spark-rapids/issues/1885)|[BUG] natural join on string key results in a data frame with spurious NULLs| +|[#1785](https://github.com/NVIDIA/spark-rapids/issues/1785)|[BUG] Rapids pytest integration tests FAILED on Yarn cluster with unrecognized arguments: `--std_input_path=src/test/resources/`| +|[#999](https://github.com/NVIDIA/spark-rapids/issues/999)|[BUG] test_multi_types_window_aggs_for_rows_lead_lag fails against Spark 3.1.0| +|[#1818](https://github.com/NVIDIA/spark-rapids/issues/1818)|[BUG] unmoored doc comment warnings in GpuCast| +|[#1817](https://github.com/NVIDIA/spark-rapids/issues/1817)|[BUG] Developer build with local modifications fails during verify phase| +|[#1644](https://github.com/NVIDIA/spark-rapids/issues/1644)|[BUG] test_window_aggregate_udf_array_from_python fails on databricks| +|[#1771](https://github.com/NVIDIA/spark-rapids/issues/1771)|[BUG] Databricks AWS CI/CD failing to create cluster| +|[#1157](https://github.com/NVIDIA/spark-rapids/issues/1157)|[BUG] Fix regression supporting to_date on GPU with Spark 3.1.0| +|[#716](https://github.com/NVIDIA/spark-rapids/issues/716)|[BUG] Cast String to TimeStamp issues| +|[#1117](https://github.com/NVIDIA/spark-rapids/issues/1117)|[BUG] CAST string to date returns wrong values for dates with out-of-range values| +|[#1670](https://github.com/NVIDIA/spark-rapids/issues/1670)|[BUG] Some TPC-DS queries fail with AQE when decimal types enabled| +|[#1730](https://github.com/NVIDIA/spark-rapids/issues/1730)|[BUG] Range Partitioning can crash when processing is in the order-by| +|[#1726](https://github.com/NVIDIA/spark-rapids/issues/1726)|[BUG] java url decode test failing on databricks, emr, and dataproc| +|[#1651](https://github.com/NVIDIA/spark-rapids/issues/1651)|[BUG] GDS exception when writing shuffle file| +|[#1702](https://github.com/NVIDIA/spark-rapids/issues/1702)|[BUG] check all tests marked xfail for Spark 3.1.1| +|[#575](https://github.com/NVIDIA/spark-rapids/issues/575)|[BUG] Spark 3.1 FAILED join_test.py::test_broadcast_join_mixed[FullOuter][IGNORE_ORDER] failed| +|[#577](https://github.com/NVIDIA/spark-rapids/issues/577)|[BUG] Spark 3.1 log arithmetic functions fail| +|[#1541](https://github.com/NVIDIA/spark-rapids/issues/1541)|[BUG] Tests fail in integration in distributed mode after allowing nested types through in sort and shuffle| +|[#1626](https://github.com/NVIDIA/spark-rapids/issues/1626)|[BUG] TPC-DS-like query 77 at scale=3TB fails with maxResultSize exceeded error| +|[#1576](https://github.com/NVIDIA/spark-rapids/issues/1576)|[BUG] loading SPARK-32639 example parquet file triggers a JVM crash | +|[#1643](https://github.com/NVIDIA/spark-rapids/issues/1643)|[BUG] TPC-DS-Like q10, q35, and q69 - slow or hanging at leftSemiJoin| +|[#1650](https://github.com/NVIDIA/spark-rapids/issues/1650)|[BUG] BenchmarkRunner does not include query name in JSON summary filename when running multiple queries| +|[#1654](https://github.com/NVIDIA/spark-rapids/issues/1654)|[BUG] TPC-DS-like query 59 at scale=3TB with AQE fails with join mismatch| +|[#1274](https://github.com/NVIDIA/spark-rapids/issues/1274)|[BUG] OutOfMemoryError - Maximum pool size exceeded while running 24 day criteo ETL Transform stage| +|[#1497](https://github.com/NVIDIA/spark-rapids/issues/1497)|[BUG] Spark-rapids v0.3.0 pytest integration tests with UCX on FAILED on Yarn cluster| +|[#1534](https://github.com/NVIDIA/spark-rapids/issues/1534)|[BUG] Spark 3.1.1 test failure in writing due to removal of InMemoryFileIndex.shouldFilterOut| +|[#1155](https://github.com/NVIDIA/spark-rapids/issues/1155)|[BUG] on shutdown don't print `Socket closed` exception when shutting down UCX.scala| +|[#1510](https://github.com/NVIDIA/spark-rapids/issues/1510)|[BUG] IllegalArgumentException during shuffle| +|[#1513](https://github.com/NVIDIA/spark-rapids/issues/1513)|[BUG] executor not fully initialized may get calls from Spark, in the process setting the `catalog` incorrectly| +|[#1466](https://github.com/NVIDIA/spark-rapids/issues/1466)|[BUG] Databricks build must run before the rapids nightly| +|[#1456](https://github.com/NVIDIA/spark-rapids/issues/1456)|[BUG] Databricks 0.4 parquet integration tests fail| +|[#1400](https://github.com/NVIDIA/spark-rapids/issues/1400)|[BUG] Regressions in spark-shell usage of benchmark utilities| +|[#1119](https://github.com/NVIDIA/spark-rapids/issues/1119)|[BUG] inner join fails with Column size cannot be negative| +|[#1079](https://github.com/NVIDIA/spark-rapids/issues/1079)|[BUG]The Scala UDF function cannot invoke the UDF compiler when it's passed to "explode"| +|[#1298](https://github.com/NVIDIA/spark-rapids/issues/1298)|TPCxBB query16 failed at UnsupportedOperationException: org.apache.parquet.column.values.dictionary.PlainValuesDictionary$PlainIntegerDictionary| +|[#1271](https://github.com/NVIDIA/spark-rapids/issues/1271)|[BUG] CastOpSuite and AnsiCastOpSuite failing with ArithmeticException on Spark 3.1| +|[#84](https://github.com/NVIDIA/spark-rapids/issues/84)|[BUG] sort does not match spark for -0.0 and 0.0| +|[#578](https://github.com/NVIDIA/spark-rapids/issues/578)|[BUG] Spark 3.1 qa_nightly_select_test.py Full join test failures| +|[#586](https://github.com/NVIDIA/spark-rapids/issues/586)|[BUG] Spark3.1 tpch failures| +|[#837](https://github.com/NVIDIA/spark-rapids/issues/837)|[BUG] Distinct count of floating point values differs with regular spark| +|[#953](https://github.com/NVIDIA/spark-rapids/issues/953)|[BUG] 3.1.0 pos_explode tests are failing| +|[#127](https://github.com/NVIDIA/spark-rapids/issues/127)|[BUG] String CSV parsing does not respect nullValues| +|[#1203](https://github.com/NVIDIA/spark-rapids/issues/1203)|[BUG] tpcds query 51 fails with join error on Spark 3.1.0| +|[#750](https://github.com/NVIDIA/spark-rapids/issues/750)|[BUG] udf_cudf_test::test_with_column fails with IPC error | +|[#1348](https://github.com/NVIDIA/spark-rapids/issues/1348)|[BUG] Host columnar decimal conversions are failing| +|[#1270](https://github.com/NVIDIA/spark-rapids/issues/1270)|[BUG] Benchmark runner fails to produce report if benchmark fails due to an invalid query plan| +|[#1179](https://github.com/NVIDIA/spark-rapids/issues/1179)|[BUG] SerializeConcatHostBuffersDeserializeBatch may have thread issues| +|[#1115](https://github.com/NVIDIA/spark-rapids/issues/1115)|[BUG] Unchecked type warning in SparkQueryCompareTestSuite| + +### PRs +||| +|:---|:---| +|[#1963](https://github.com/NVIDIA/spark-rapids/pull/1963)|Update changelog 0.4 [skip ci]| +|[#1960](https://github.com/NVIDIA/spark-rapids/pull/1960)|Replace sonatype staging link with maven central link| +|[#1945](https://github.com/NVIDIA/spark-rapids/pull/1945)|Update changelog 0.4 [skip ci]| +|[#1910](https://github.com/NVIDIA/spark-rapids/pull/1910)|Make hash partitioning match CPU| +|[#1927](https://github.com/NVIDIA/spark-rapids/pull/1927)|Change cuDF dependency to 0.18.1| +|[#1934](https://github.com/NVIDIA/spark-rapids/pull/1934)|Update documentation to use cudf version 0.18.1| +|[#1871](https://github.com/NVIDIA/spark-rapids/pull/1871)|Disable coalesce batch spilling to avoid cudf contiguous_split bug| +|[#1849](https://github.com/NVIDIA/spark-rapids/pull/1849)|Update changelog for 0.4| +|[#1744](https://github.com/NVIDIA/spark-rapids/pull/1744)|Fix NullPointerException on null partition insert| +|[#1842](https://github.com/NVIDIA/spark-rapids/pull/1842)|Update to note support for 3.0.2| +|[#1832](https://github.com/NVIDIA/spark-rapids/pull/1832)|Spark 3.1.1 shim no longer a snapshot shim| +|[#1831](https://github.com/NVIDIA/spark-rapids/pull/1831)|Spark 3.0.2 shim no longer a snapshot shim| +|[#1826](https://github.com/NVIDIA/spark-rapids/pull/1826)|Remove benchmarks| +|[#1828](https://github.com/NVIDIA/spark-rapids/pull/1828)|Update cudf dependency to 0.18| +|[#1813](https://github.com/NVIDIA/spark-rapids/pull/1813)|Fix LEAD/LAG failures in Spark 3.1.1| +|[#1819](https://github.com/NVIDIA/spark-rapids/pull/1819)|Fix scaladoc warning in GpuCast| +|[#1820](https://github.com/NVIDIA/spark-rapids/pull/1820)|[BUG] make modified check pre-merge only| +|[#1780](https://github.com/NVIDIA/spark-rapids/pull/1780)|Remove SNAPSHOT from test and integration_test READMEs| +|[#1809](https://github.com/NVIDIA/spark-rapids/pull/1809)|check if modified files after update_config/supported| +|[#1804](https://github.com/NVIDIA/spark-rapids/pull/1804)|Update UCX documentation for RX_QUEUE_LEN and Docker| +|[#1810](https://github.com/NVIDIA/spark-rapids/pull/1810)|Pandas UDF: Sort the data before computing the sum.| +|[#1751](https://github.com/NVIDIA/spark-rapids/pull/1751)|Exclude foldable expressions from GPU if constant folding is disabled| +|[#1798](https://github.com/NVIDIA/spark-rapids/pull/1798)|Add documentation about explain not on GPU when AQE is on| +|[#1766](https://github.com/NVIDIA/spark-rapids/pull/1766)|Branch 0.4 release docs| +|[#1794](https://github.com/NVIDIA/spark-rapids/pull/1794)|Build python output schema from udf expressions| +|[#1783](https://github.com/NVIDIA/spark-rapids/pull/1783)|Fix the collect_list over window tests failures on db| +|[#1781](https://github.com/NVIDIA/spark-rapids/pull/1781)|Better float/double cases for casting tests| +|[#1790](https://github.com/NVIDIA/spark-rapids/pull/1790)|Record row counts in benchmark runs that call collect| +|[#1779](https://github.com/NVIDIA/spark-rapids/pull/1779)|Add support of DateType and TimestampType for GetTimestamp expression| +|[#1768](https://github.com/NVIDIA/spark-rapids/pull/1768)|Updating getting started Databricks docs| +|[#1742](https://github.com/NVIDIA/spark-rapids/pull/1742)|Fix regression supporting to_date with Spark-3.1| +|[#1775](https://github.com/NVIDIA/spark-rapids/pull/1775)|Fix ambiguous ordering for some tests| +|[#1760](https://github.com/NVIDIA/spark-rapids/pull/1760)|Update GpuDataSourceScanExec and GpuBroadcastExchangeExec to fix audit issues| +|[#1750](https://github.com/NVIDIA/spark-rapids/pull/1750)|Detect task failures in benchmarks| +|[#1767](https://github.com/NVIDIA/spark-rapids/pull/1767)|Consistent Spark version for test and production| +|[#1741](https://github.com/NVIDIA/spark-rapids/pull/1741)|Reduce regex use in CAST| +|[#1756](https://github.com/NVIDIA/spark-rapids/pull/1756)|Skip RAPIDS accelerated Java UDF tests if UDF fails to load| +|[#1716](https://github.com/NVIDIA/spark-rapids/pull/1716)|Update RapidsShuffleManager documentation for branch 0.4| +|[#1740](https://github.com/NVIDIA/spark-rapids/pull/1740)|Disable ORC writes until bug can be fixed| +|[#1747](https://github.com/NVIDIA/spark-rapids/pull/1747)|Fix resource leaks in unit tests| +|[#1725](https://github.com/NVIDIA/spark-rapids/pull/1725)|Branch 0.4 FAQ reorg| +|[#1718](https://github.com/NVIDIA/spark-rapids/pull/1718)|CAST string to temporal type now calls isTimestamp| +|[#1734](https://github.com/NVIDIA/spark-rapids/pull/1734)|Disable range partitioning if computation is needed| +|[#1723](https://github.com/NVIDIA/spark-rapids/pull/1723)|Removed StructTypes support for ParquetCachedBatchSerializer as cudf doesn't support it yet| +|[#1714](https://github.com/NVIDIA/spark-rapids/pull/1714)|Add support for RAPIDS accelerated Java UDFs| +|[#1713](https://github.com/NVIDIA/spark-rapids/pull/1713)|Call GpuDeviceManager.shutdown when the executor plugin is shutting down| +|[#1596](https://github.com/NVIDIA/spark-rapids/pull/1596)|Added in Decimal support to ParquetCachedBatchSerializer| +|[#1706](https://github.com/NVIDIA/spark-rapids/pull/1706)|cleanup unused is_before_spark_310| +|[#1685](https://github.com/NVIDIA/spark-rapids/pull/1685)|Fix CustomShuffleReader replacement when decimal types enabled| +|[#1699](https://github.com/NVIDIA/spark-rapids/pull/1699)|Add docs about Spark 3.1 in standalone modes not needing extra class path| +|[#1701](https://github.com/NVIDIA/spark-rapids/pull/1701)|remove xfail for orc test_input_meta for spark 3.1.0| +|[#1703](https://github.com/NVIDIA/spark-rapids/pull/1703)|Remove xfail for spark 3.1.0 test_broadcast_join_mixed FullOuter| +|[#1676](https://github.com/NVIDIA/spark-rapids/pull/1676)|BenchmarkRunner option to generate query plan diagrams in DOT format| +|[#1695](https://github.com/NVIDIA/spark-rapids/pull/1695)|support alternate jar paths| +|[#1694](https://github.com/NVIDIA/spark-rapids/pull/1694)|increase mem and limit parallelism for pre-merge| +|[#1691](https://github.com/NVIDIA/spark-rapids/pull/1691)|add validate_execs_in_gpu_plan to pytest.ini| +|[#1692](https://github.com/NVIDIA/spark-rapids/pull/1692)|Add the integration test resources to the test tarball| +|[#1677](https://github.com/NVIDIA/spark-rapids/pull/1677)|When PTDS is enabled, print warning if the allocator is not ARENA| +|[#1683](https://github.com/NVIDIA/spark-rapids/pull/1683)|update changelog to verify autotmerge 0.5 setup [skip ci]| +|[#1673](https://github.com/NVIDIA/spark-rapids/pull/1673)|support auto-merge for branch 0.5 [skip ci]| +|[#1681](https://github.com/NVIDIA/spark-rapids/pull/1681)|Xfail the collect_list tests for databricks| +|[#1678](https://github.com/NVIDIA/spark-rapids/pull/1678)|Fix array/struct checks in Sort and HashAggregate and sorting tests in distributed mode| +|[#1671](https://github.com/NVIDIA/spark-rapids/pull/1671)|Allow metrics to be configurable by level| +|[#1675](https://github.com/NVIDIA/spark-rapids/pull/1675)|add run_pyspark_from_build.sh to the pytest distribution tarball| +|[#1548](https://github.com/NVIDIA/spark-rapids/pull/1548)|Support executing collect_list on GPU with windowing.| +|[#1593](https://github.com/NVIDIA/spark-rapids/pull/1593)|Avoid unnecessary Table instances after contiguous split| +|[#1592](https://github.com/NVIDIA/spark-rapids/pull/1592)|Add in support for Decimal divide| +|[#1668](https://github.com/NVIDIA/spark-rapids/pull/1668)|Implement way for python integration tests to validate Exec is in GPU plan| +|[#1669](https://github.com/NVIDIA/spark-rapids/pull/1669)|Add FAQ entries for executor-per-GPU questions| +|[#1661](https://github.com/NVIDIA/spark-rapids/pull/1661)|Enable Parquet test for file containing map struct key| +|[#1664](https://github.com/NVIDIA/spark-rapids/pull/1664)|Filter nulls for left semi and left anti join to work around cudf| +|[#1665](https://github.com/NVIDIA/spark-rapids/pull/1665)|Add better automated tests for Arrow columnar copy in HostColumnarToGpu| +|[#1614](https://github.com/NVIDIA/spark-rapids/pull/1614)|add alluxio getting start document| +|[#1639](https://github.com/NVIDIA/spark-rapids/pull/1639)|support GpuScalarSubquery| +|[#1656](https://github.com/NVIDIA/spark-rapids/pull/1656)|Move UDF to Catalyst Expressions to its own document| +|[#1663](https://github.com/NVIDIA/spark-rapids/pull/1663)|BenchmarkRunner - Include query name in JSON summary filename| +|[#1655](https://github.com/NVIDIA/spark-rapids/pull/1655)|Fix extraneous shuffles added by AQE| +|[#1652](https://github.com/NVIDIA/spark-rapids/pull/1652)|Fix typo in arrow optimized config name - spark.rapids.arrowCopyOptimizationEnabled| +|[#1645](https://github.com/NVIDIA/spark-rapids/pull/1645)|Run Databricks IT with python-xdist parallel, includes test fixes and xfail| +|[#1649](https://github.com/NVIDIA/spark-rapids/pull/1649)|Move building from source docs to contributing guide| +|[#1637](https://github.com/NVIDIA/spark-rapids/pull/1637)|Fail DivModLike on zero divisor in ANSI mode| +|[#1646](https://github.com/NVIDIA/spark-rapids/pull/1646)|Update links in rapids-udfs.md after moving to subfolder| +|[#1641](https://github.com/NVIDIA/spark-rapids/pull/1641)|Xfail struct and array order by tests on Dataproc| +|[#1565](https://github.com/NVIDIA/spark-rapids/pull/1565)|Add GPU accelerated array_contains operator| +|[#1617](https://github.com/NVIDIA/spark-rapids/pull/1617)|Enable nightly test checks for Apache Spark| +|[#1636](https://github.com/NVIDIA/spark-rapids/pull/1636)|RAPIDS accelerated Spark Scala UDF support| +|[#1634](https://github.com/NVIDIA/spark-rapids/pull/1634)|Fix databricks build since Arrow code added| +|[#1599](https://github.com/NVIDIA/spark-rapids/pull/1599)|Add division by zero tests for Spark 3.1 behavior| +|[#1619](https://github.com/NVIDIA/spark-rapids/pull/1619)|Update GpuFileSourceScanExec to be in sync with DataSourceScanExec| +|[#1631](https://github.com/NVIDIA/spark-rapids/pull/1631)|Explicitly add maven-jar-plugin version to improve incremental build time.| +|[#1624](https://github.com/NVIDIA/spark-rapids/pull/1624)|Update explain format to show what will and will not run on the GPU| +|[#1622](https://github.com/NVIDIA/spark-rapids/pull/1622)|Support faster copy for a custom DataSource V2 which supplies Arrow data| +|[#1621](https://github.com/NVIDIA/spark-rapids/pull/1621)|Additional functionality docs| +|[#1618](https://github.com/NVIDIA/spark-rapids/pull/1618)|update blossom-ci for security updates [skip ci]| +|[#1562](https://github.com/NVIDIA/spark-rapids/pull/1562)|add alluxio support| +|[#1597](https://github.com/NVIDIA/spark-rapids/pull/1597)|Documentation for Parquet serializer| +|[#1611](https://github.com/NVIDIA/spark-rapids/pull/1611)|Add in flag for integration tests to not skip required tests| +|[#1609](https://github.com/NVIDIA/spark-rapids/pull/1609)|Disable float round/bround by default| +|[#1615](https://github.com/NVIDIA/spark-rapids/pull/1615)|Add in window support for average| +|[#1610](https://github.com/NVIDIA/spark-rapids/pull/1610)|Limit length of spark app name in BenchmarkRunner| +|[#1579](https://github.com/NVIDIA/spark-rapids/pull/1579)|Support TakeOrderedAndProject| +|[#1581](https://github.com/NVIDIA/spark-rapids/pull/1581)|Support Decimal type for CollectLimitExec| +|[#1591](https://github.com/NVIDIA/spark-rapids/pull/1591)|Add support for running multiple queries in BenchmarkRunner| +|[#1595](https://github.com/NVIDIA/spark-rapids/pull/1595)|Fix Github documentation issue template| +|[#1577](https://github.com/NVIDIA/spark-rapids/pull/1577)|rename directory from spark310 to spark311| +|[#1578](https://github.com/NVIDIA/spark-rapids/pull/1578)|Test to track RAPIDS-side issues re SPARK-32639| +|[#1583](https://github.com/NVIDIA/spark-rapids/pull/1583)|fix request-action issue [skip ci]| +|[#1555](https://github.com/NVIDIA/spark-rapids/pull/1555)|Enable ANSI mode for CAST string to timestamp| +|[#1531](https://github.com/NVIDIA/spark-rapids/pull/1531)|Decimal Support for writing Parquet| +|[#1545](https://github.com/NVIDIA/spark-rapids/pull/1545)|Support comparing ORC data| +|[#1570](https://github.com/NVIDIA/spark-rapids/pull/1570)|Branch 0.4 doc cleanup| +|[#1569](https://github.com/NVIDIA/spark-rapids/pull/1569)|Add shim method shouldIgnorePath| +|[#1564](https://github.com/NVIDIA/spark-rapids/pull/1564)|Add in support for Decimal Multiply and DIV| +|[#1561](https://github.com/NVIDIA/spark-rapids/pull/1561)|Decimal support for add and subtract| +|[#1560](https://github.com/NVIDIA/spark-rapids/pull/1560)|support sum in window aggregation for decimal| +|[#1546](https://github.com/NVIDIA/spark-rapids/pull/1546)|Cleanup shutdown logging for UCX shuffle| +|[#1551](https://github.com/NVIDIA/spark-rapids/pull/1551)|RAPIDS-accelerated Hive UDFs support all types| +|[#1543](https://github.com/NVIDIA/spark-rapids/pull/1543)|Shuffle/transport enabled by default| +|[#1552](https://github.com/NVIDIA/spark-rapids/pull/1552)|Disable blackduck signature check| +|[#1540](https://github.com/NVIDIA/spark-rapids/pull/1540)|Handle ShuffleManager api calls when plugin is not fully initialized| +|[#1547](https://github.com/NVIDIA/spark-rapids/pull/1547)|Cleanup shuffle transport receive calls| +|[#1512](https://github.com/NVIDIA/spark-rapids/pull/1512)|Support window operations on Decimal| +|[#1532](https://github.com/NVIDIA/spark-rapids/pull/1532)|Support casting from decimal to decimal| +|[#1542](https://github.com/NVIDIA/spark-rapids/pull/1542)|Change the number of partitions to zero when a range is empty| +|[#1506](https://github.com/NVIDIA/spark-rapids/pull/1506)|Add --use-decimals flag to TPC-DS ConvertFiles| +|[#1511](https://github.com/NVIDIA/spark-rapids/pull/1511)|Remove unused Jenkinsfiles [skip ci]| +|[#1505](https://github.com/NVIDIA/spark-rapids/pull/1505)|Add least, greatest and eqNullSafe support for DecimalType| +|[#1484](https://github.com/NVIDIA/spark-rapids/pull/1484)|add doc for nsight systems bundled with cuda toolkit| +|[#1478](https://github.com/NVIDIA/spark-rapids/pull/1478)|Documentation for RAPIDS-accelerated Hive UDFs| +|[#1477](https://github.com/NVIDIA/spark-rapids/pull/1477)|Allow structs and arrays to pass through for Shuffle and Sort | +|[#1489](https://github.com/NVIDIA/spark-rapids/pull/1489)|Adds in some support for the array sql function| +|[#1438](https://github.com/NVIDIA/spark-rapids/pull/1438)|Cast from numeric types to decimal type| +|[#1493](https://github.com/NVIDIA/spark-rapids/pull/1493)|Moved ParquetRecordMaterializer to the shim package to follow convention| +|[#1495](https://github.com/NVIDIA/spark-rapids/pull/1495)|Fix merge conflict, merge branch 0.3 to branch 0.4 [skip ci]| +|[#1472](https://github.com/NVIDIA/spark-rapids/pull/1472)|Add an example RAPIDS-accelerated Hive UDF using native code| +|[#1488](https://github.com/NVIDIA/spark-rapids/pull/1488)|Rename Spark 3.1.0 shim to Spark 3.1.1 to match community| +|[#1474](https://github.com/NVIDIA/spark-rapids/pull/1474)|Fix link| +|[#1476](https://github.com/NVIDIA/spark-rapids/pull/1476)|DecimalType support for Aggregate Count| +|[#1475](https://github.com/NVIDIA/spark-rapids/pull/1475)| Join support for DecimalType| +|[#1244](https://github.com/NVIDIA/spark-rapids/pull/1244)|Support round and bround SQL functions | +|[#1458](https://github.com/NVIDIA/spark-rapids/pull/1458)|Add in support for struct and named_struct| +|[#1465](https://github.com/NVIDIA/spark-rapids/pull/1465)|DecimalType support for UnionExec and ExpandExec| +|[#1450](https://github.com/NVIDIA/spark-rapids/pull/1450)|Add dynamic configs for the spark-rapids IT pipelines| +|[#1207](https://github.com/NVIDIA/spark-rapids/pull/1207)|Spark SQL hash function using murmur3| +|[#1457](https://github.com/NVIDIA/spark-rapids/pull/1457)|Support reading decimal columns from parquet files on Databricks| +|[#1455](https://github.com/NVIDIA/spark-rapids/pull/1455)|Upgrade Scala Maven Plugin to 4.3.0| +|[#1453](https://github.com/NVIDIA/spark-rapids/pull/1453)|DecimalType support for IfElse and Coalesce| +|[#1452](https://github.com/NVIDIA/spark-rapids/pull/1452)|Support DecimalType for CaseWhen| +|[#1444](https://github.com/NVIDIA/spark-rapids/pull/1444)|Improve UX when running benchmarks from Spark shell| +|[#1294](https://github.com/NVIDIA/spark-rapids/pull/1294)|Support reading decimal columns from parquet files| +|[#1153](https://github.com/NVIDIA/spark-rapids/pull/1153)|Scala UDF will compile children expressions in Project| +|[#1416](https://github.com/NVIDIA/spark-rapids/pull/1416)|Optimize mvn dependency download scripts| +|[#1430](https://github.com/NVIDIA/spark-rapids/pull/1430)|Add project for testing code that requires Spark 3.1.0 or later| +|[#1425](https://github.com/NVIDIA/spark-rapids/pull/1425)|Add in Decimal support for abs, floor, ceil, unary - and unary +| +|[#1427](https://github.com/NVIDIA/spark-rapids/pull/1427)|Revert "Make the multi-threaded parquet reader the default"| +|[#1420](https://github.com/NVIDIA/spark-rapids/pull/1420)|Add udf jar to nightly integration tests| +|[#1422](https://github.com/NVIDIA/spark-rapids/pull/1422)|Log the number of concurrent gpu tasks allowed on Executor startup| +|[#1401](https://github.com/NVIDIA/spark-rapids/pull/1401)|Accelerate the coalescing parquet reader when reading files from multiple partitioned folders| +|[#1413](https://github.com/NVIDIA/spark-rapids/pull/1413)|Add config for cast float to integral types| +|[#1313](https://github.com/NVIDIA/spark-rapids/pull/1313)|Support spilling to disk directly via cuFile/GDS| +|[#1411](https://github.com/NVIDIA/spark-rapids/pull/1411)|Add udf-examples jar to databricks build| +|[#1412](https://github.com/NVIDIA/spark-rapids/pull/1412)|Fix a lot of tests marked with xfail for Spark 3.1.0 that no longer fail| +|[#1414](https://github.com/NVIDIA/spark-rapids/pull/1414)|Build merged code of HEAD and BASE branch for pre-merge [skip ci]| +|[#1409](https://github.com/NVIDIA/spark-rapids/pull/1409)|Add option to use decimals in tpc-ds csv to parquet conversion| +|[#1410](https://github.com/NVIDIA/spark-rapids/pull/1410)|Add Decimal support for In, InSet, AtLeastNNonNulls, GetArrayItem, GetStructField, and GenerateExec| +|[#1408](https://github.com/NVIDIA/spark-rapids/pull/1408)|Support RAPIDS-accelerated HiveGenericUDF| +|[#1407](https://github.com/NVIDIA/spark-rapids/pull/1407)|Update docs and tests for null CSV support| +|[#1393](https://github.com/NVIDIA/spark-rapids/pull/1393)|Support RAPIDS-accelerated HiveSimpleUDF| +|[#1392](https://github.com/NVIDIA/spark-rapids/pull/1392)|Turn on hash partitioning for decimal support| +|[#1402](https://github.com/NVIDIA/spark-rapids/pull/1402)|Better GPU Cast type checks| +|[#1404](https://github.com/NVIDIA/spark-rapids/pull/1404)|Fix branch 0.4 merge conflict| +|[#1323](https://github.com/NVIDIA/spark-rapids/pull/1323)|More advanced type checking and documentation| +|[#1391](https://github.com/NVIDIA/spark-rapids/pull/1391)|Remove extra null join filtering because cudf is fast for this now.| +|[#1395](https://github.com/NVIDIA/spark-rapids/pull/1395)|Fix branch-0.3 -> branch-0.4 automerge| +|[#1382](https://github.com/NVIDIA/spark-rapids/pull/1382)|Handle "MM[/-]dd" and "dd[/-]MM" datetime formats in UnixTimeExprMeta| +|[#1390](https://github.com/NVIDIA/spark-rapids/pull/1390)|Accelerated columnar to row/row to columnar for decimal| +|[#1380](https://github.com/NVIDIA/spark-rapids/pull/1380)|Adds in basic support for decimal sort, sum, and some shuffle| +|[#1367](https://github.com/NVIDIA/spark-rapids/pull/1367)|Reuse gpu expression conversion rules when checking sort order| +|[#1349](https://github.com/NVIDIA/spark-rapids/pull/1349)|Add canonicalization tests| +|[#1368](https://github.com/NVIDIA/spark-rapids/pull/1368)|Move to cudf 0.18-SNAPSHOT| +|[#1361](https://github.com/NVIDIA/spark-rapids/pull/1361)|Use the correct precision when reading spark columnar data.| +|[#1273](https://github.com/NVIDIA/spark-rapids/pull/1273)|Update docs and scripts to 0.4.0-SNAPSHOT| +|[#1321](https://github.com/NVIDIA/spark-rapids/pull/1321)|Refactor to stop inheriting from HashJoin| +|[#1311](https://github.com/NVIDIA/spark-rapids/pull/1311)|ParquetCachedBatchSerializer code cleanup| +|[#1303](https://github.com/NVIDIA/spark-rapids/pull/1303)|Add explicit outputOrdering for BHJ and SHJ in spark310 shim| +|[#1299](https://github.com/NVIDIA/spark-rapids/pull/1299)|Benchmark runner improved error handling| + +## Release 0.3 + +### Features +||| +|:---|:---| +|[#1002](https://github.com/NVIDIA/spark-rapids/issues/1002)|[FEA] RapidsHostColumnVectorCore should verify cudf data with respect to the expected spark type | +|[#444](https://github.com/NVIDIA/spark-rapids/issues/444)|[FEA] Plugable Cache| +|[#1158](https://github.com/NVIDIA/spark-rapids/issues/1158)|[FEA] Better documentation on type support| +|[#57](https://github.com/NVIDIA/spark-rapids/issues/57)|[FEA] Support INT96 for parquet reads and writes| +|[#1003](https://github.com/NVIDIA/spark-rapids/issues/1003)|[FEA] Reduce overlap between RapidsHostColumnVector and RapidsHostColumnVectorCore| +|[#913](https://github.com/NVIDIA/spark-rapids/issues/913)|[FEA] In Pluggable Cache Support CalendarInterval while creating CachedBatches| +|[#1092](https://github.com/NVIDIA/spark-rapids/issues/1092)|[FEA] In Pluggable Cache handle nested types having CalendarIntervalType and NullType| +|[#670](https://github.com/NVIDIA/spark-rapids/issues/670)|[FEA] Support NullType| +|[#50](https://github.com/NVIDIA/spark-rapids/issues/50)|[FEA] support `spark.sql.legacy.timeParserPolicy`| +|[#1144](https://github.com/NVIDIA/spark-rapids/issues/1144)|[FEA] Remove Databricks 3.0.0 shim layer| +|[#1096](https://github.com/NVIDIA/spark-rapids/issues/1096)|[FEA] Implement parquet CreateDataSourceTableAsSelectCommand| +|[#688](https://github.com/NVIDIA/spark-rapids/issues/688)|[FEA] udf compiler should be auto-appended to `spark.sql.extensions`| +|[#502](https://github.com/NVIDIA/spark-rapids/issues/502)|[FEA] Support Databricks 7.3 LTS Runtime| +|[#764](https://github.com/NVIDIA/spark-rapids/issues/764)|[FEA] Sanity checks for cudf jar mismatch| +|[#1018](https://github.com/NVIDIA/spark-rapids/issues/1018)|[FEA] Log details related to GPU memory fragmentation on GPU OOM| +|[#619](https://github.com/NVIDIA/spark-rapids/issues/619)|[FEA] log whether libcudf and libcudfjni were built for PTDS| +|[#905](https://github.com/NVIDIA/spark-rapids/issues/905)|[FEA] create AWS EMR 3.0.1 shim| +|[#838](https://github.com/NVIDIA/spark-rapids/issues/838)|[FEA] Support window count for a column| +|[#864](https://github.com/NVIDIA/spark-rapids/issues/864)|[FEA] config option to enable RMM arena memory resource| +|[#430](https://github.com/NVIDIA/spark-rapids/issues/430)|[FEA] Audit: Parquet Writer support for TIMESTAMP_MILLIS| +|[#818](https://github.com/NVIDIA/spark-rapids/issues/818)|[FEA] Create shim layer for AWS EMR | +|[#608](https://github.com/NVIDIA/spark-rapids/issues/608)|[FEA] Parquet small file optimization improve handle merge schema| + +### Performance +||| +|:---|:---| +|[#446](https://github.com/NVIDIA/spark-rapids/issues/446)|[FEA] Test jucx in 1.9.x branch| +|[#1038](https://github.com/NVIDIA/spark-rapids/issues/1038)|[FEA] Accelerate the data transfer for plan `WindowInPandasExec`| +|[#533](https://github.com/NVIDIA/spark-rapids/issues/533)|[FEA] Improve PTDS performance| +|[#849](https://github.com/NVIDIA/spark-rapids/issues/849)|[FEA] Have GpuColumnarBatchSerializer return GpuColumnVectorFromBuffer instances| +|[#784](https://github.com/NVIDIA/spark-rapids/issues/784)|[FEA] Allow Host Spilling to be more dynamic| +|[#627](https://github.com/NVIDIA/spark-rapids/issues/627)|[FEA] Further parquet reading small file improvements| +|[#5](https://github.com/NVIDIA/spark-rapids/issues/5)|[FEA] Support Adaptive Execution| + +### Bugs Fixed +||| +|:---|:---| +|[#1423](https://github.com/NVIDIA/spark-rapids/issues/1423)|[BUG] Mortgage ETL sample failed with spark.sql.adaptive enabled on AWS EMR 6.2 | +|[#1369](https://github.com/NVIDIA/spark-rapids/issues/1369)|[BUG] TPC-DS Query Failing on EMR 6.2 with AQE| +|[#1344](https://github.com/NVIDIA/spark-rapids/issues/1344)|[BUG] Spark-rapids Pytests failed on On Databricks cluster spark standalone mode| +|[#1279](https://github.com/NVIDIA/spark-rapids/issues/1279)|[BUG] TPC-DS query 2 failing with NPE| +|[#1280](https://github.com/NVIDIA/spark-rapids/issues/1280)|[BUG] TPC-DS query 93 failing with UnsupportedOperationException| +|[#1308](https://github.com/NVIDIA/spark-rapids/issues/1308)|[BUG] TPC-DS query 14a runs much slower on 0.3| +|[#1284](https://github.com/NVIDIA/spark-rapids/issues/1284)|[BUG] TPC-DS query 77 at scale=1TB fails with maxResultSize exceeded error| +|[#1061](https://github.com/NVIDIA/spark-rapids/issues/1061)|[BUG] orc_test.py is failing| +|[#1197](https://github.com/NVIDIA/spark-rapids/issues/1197)|[BUG] java.lang.NullPointerException when exporting delta table| +|[#685](https://github.com/NVIDIA/spark-rapids/issues/685)|[BUG] In ParqueCachedBatchSerializer, serializing parquet buffers might blow up in certain cases| +|[#1269](https://github.com/NVIDIA/spark-rapids/issues/1269)|[BUG] GpuSubstring is not expected to be a part of a SortOrder| +|[#1246](https://github.com/NVIDIA/spark-rapids/issues/1246)|[BUG] Many TPC-DS benchmarks fail when writing to Parquet| +|[#961](https://github.com/NVIDIA/spark-rapids/issues/961)|[BUG] ORC predicate pushdown should work with case-insensitive analysis| +|[#962](https://github.com/NVIDIA/spark-rapids/issues/962)|[BUG] Loading columns from an ORC file without column names returns no data| +|[#1245](https://github.com/NVIDIA/spark-rapids/issues/1245)|[BUG] Code adding buffers to the spillable store should synchronize| +|[#570](https://github.com/NVIDIA/spark-rapids/issues/570)|[BUG] Continue debugging OOM after ensuring device store is empty| +|[#972](https://github.com/NVIDIA/spark-rapids/issues/972)|[BUG] total time metric is redundant with scan time| +|[#1039](https://github.com/NVIDIA/spark-rapids/issues/1039)|[BUG] UNBOUNDED window ranges on null timestamp columns produces incorrect results.| +|[#1195](https://github.com/NVIDIA/spark-rapids/issues/1195)|[BUG] AcceleratedColumnarToRowIterator queue empty| +|[#1177](https://github.com/NVIDIA/spark-rapids/issues/1177)|[BUG] leaks possible in the rapids shuffle if batches are received after the task completes| +|[#1216](https://github.com/NVIDIA/spark-rapids/issues/1216)|[BUG] Failure to recognize ORC file format when loaded via Hive| +|[#898](https://github.com/NVIDIA/spark-rapids/issues/898)|[BUG] count reductions are failing on databricks because lack for Complete support| +|[#1184](https://github.com/NVIDIA/spark-rapids/issues/1184)|[BUG] test_window_aggregate_udf_array_from_python fails on databricks 3.0.1| +|[#1151](https://github.com/NVIDIA/spark-rapids/issues/1151)|[BUG]Add databricks 3.0.1 shim layer for GpuWindowInPandasExec.| +|[#1199](https://github.com/NVIDIA/spark-rapids/issues/1199)|[BUG] No data size in Input column in Stages page from Spark UI when using Parquet as file source| +|[#1031](https://github.com/NVIDIA/spark-rapids/issues/1031)|[BUG] dependency info properties file contains error messages| +|[#1149](https://github.com/NVIDIA/spark-rapids/issues/1149)|[BUG] Scaladoc warnings in GpuDataSource| +|[#1185](https://github.com/NVIDIA/spark-rapids/issues/1185)|[BUG] test_hash_multiple_mode_query failing| +|[#724](https://github.com/NVIDIA/spark-rapids/issues/724)|[BUG] PySpark test_broadcast_nested_loop_join_special_case intermittent failure| +|[#1164](https://github.com/NVIDIA/spark-rapids/issues/1164)|[BUG] ansi_cast tests are failing in 3.1.0| +|[#1110](https://github.com/NVIDIA/spark-rapids/issues/1110)|[BUG] Special date "now" has wrong value on GPU| +|[#1139](https://github.com/NVIDIA/spark-rapids/issues/1139)|[BUG] Host columnar to GPU can be very slow| +|[#1094](https://github.com/NVIDIA/spark-rapids/issues/1094)|[BUG] unix_timestamp on GPU returns invalid data for special dates| +|[#1098](https://github.com/NVIDIA/spark-rapids/issues/1098)|[BUG] unix_timestamp on GPU returns invalid data for bad input| +|[#1082](https://github.com/NVIDIA/spark-rapids/issues/1082)|[BUG] string to timestamp conversion fails with split| +|[#1140](https://github.com/NVIDIA/spark-rapids/issues/1140)|[BUG] ConcurrentModificationException error after scala test suite completes| +|[#1073](https://github.com/NVIDIA/spark-rapids/issues/1073)|[BUG] java.lang.RuntimeException: BinaryExpressions must override either eval or nullSafeEval| +|[#975](https://github.com/NVIDIA/spark-rapids/issues/975)|[BUG] BroadcastExchangeExec fails to fall back to CPU on driver node on GCP Dataproc| +|[#773](https://github.com/NVIDIA/spark-rapids/issues/773)|[BUG] Investigate high task deserialization| +|[#1035](https://github.com/NVIDIA/spark-rapids/issues/1035)|[BUG] TPC-DS query 90 with AQE enabled fails with doExecuteBroadcast exception| +|[#825](https://github.com/NVIDIA/spark-rapids/issues/825)|[BUG] test_window_aggs_for_ranges intermittently fails| +|[#1008](https://github.com/NVIDIA/spark-rapids/issues/1008)|[BUG] limit function is producing inconsistent result when type is Byte, Long, Boolean and Timestamp| +|[#996](https://github.com/NVIDIA/spark-rapids/issues/996)|[BUG] TPC-DS benchmark via spark-submit does not provide option to disable appending .dat to path| +|[#1006](https://github.com/NVIDIA/spark-rapids/issues/1006)|[BUG] Spark3.1.0 changed BasicWriteTaskStats breaks BasicColumnarWriteTaskStatsTracker| +|[#985](https://github.com/NVIDIA/spark-rapids/issues/985)|[BUG] missing metric `dataSize`| +|[#881](https://github.com/NVIDIA/spark-rapids/issues/881)|[BUG] cannot disable Sort by itself| +|[#812](https://github.com/NVIDIA/spark-rapids/issues/812)|[BUG] Test failures for 0.2 when run with multiple executors| +|[#925](https://github.com/NVIDIA/spark-rapids/issues/925)|[BUG]Range window-functions with non-timestamp order-by expressions not falling back to CPU| +|[#852](https://github.com/NVIDIA/spark-rapids/issues/852)|[BUG] BenchUtils.compareResults cannot compare partitioned files when ignoreOrdering=false| +|[#868](https://github.com/NVIDIA/spark-rapids/issues/868)|[BUG] Rounding error when casting timestamp to string for timestamps before 1970| +|[#880](https://github.com/NVIDIA/spark-rapids/issues/880)|[BUG] doing a window operation with an orderby for a single constant crashes| +|[#776](https://github.com/NVIDIA/spark-rapids/issues/776)|[BUG] Integration test fails on spark 3.1.0-SNAPSHOT| +|[#874](https://github.com/NVIDIA/spark-rapids/issues/874)|[BUG] `RapidsConf.scala` has some un-consistency for `spark.rapids.sql.format.parquet.multiThreadedRead`| +|[#860](https://github.com/NVIDIA/spark-rapids/issues/860)|[BUG] we need to mark columns from received shuffle buffers as `GpuColumnVectorFromBuffer`| +|[#122](https://github.com/NVIDIA/spark-rapids/issues/122)|[BUG] CSV Timestamp parseing is broken for TS < 1902 and TS > 2038| +|[#810](https://github.com/NVIDIA/spark-rapids/issues/810)|[BUG] UDF Integration tests fail if pandas is not installed| +|[#746](https://github.com/NVIDIA/spark-rapids/issues/746)|[BUG] cudf_udf_test.py is flakey| +|[#811](https://github.com/NVIDIA/spark-rapids/issues/811)|[BUG] 0.3 nightly is timing out | +|[#574](https://github.com/NVIDIA/spark-rapids/issues/574)|[BUG] Fix GpuTimeSub for Spark 3.1.0| + +### PRs +||| +|:---|:---| +|[#1496](https://github.com/NVIDIA/spark-rapids/pull/1496)|Update changelog for v0.3.0 release [skip ci]| +|[#1473](https://github.com/NVIDIA/spark-rapids/pull/1473)|Update documentation for 0.3 release| +|[#1371](https://github.com/NVIDIA/spark-rapids/pull/1371)|Start Guide for RAPIDS on AWS EMR 6.2| +|[#1446](https://github.com/NVIDIA/spark-rapids/pull/1446)|Update changelog for 0.3.0 release [skip ci]| +|[#1439](https://github.com/NVIDIA/spark-rapids/pull/1439)|when AQE enabled we fail to fix up exchanges properly and EMR| +|[#1433](https://github.com/NVIDIA/spark-rapids/pull/1433)|fix pandas 1.2 compatible issue| +|[#1424](https://github.com/NVIDIA/spark-rapids/pull/1424)|Make the multi-threaded parquet reader the default since coalescing doesn't handle partitioned files well| +|[#1389](https://github.com/NVIDIA/spark-rapids/pull/1389)|Update project version to 0.3.0| +|[#1387](https://github.com/NVIDIA/spark-rapids/pull/1387)|Update cudf version to 0.17| +|[#1370](https://github.com/NVIDIA/spark-rapids/pull/1370)|[REVIEW] init changelog 0.3 [skip ci]| +|[#1376](https://github.com/NVIDIA/spark-rapids/pull/1376)|MetaUtils.getBatchFromMeta should return batches with GpuColumnVectorFromBuffer| +|[#1358](https://github.com/NVIDIA/spark-rapids/pull/1358)|auto-merge: instant merge after creation [skip ci]| +|[#1359](https://github.com/NVIDIA/spark-rapids/pull/1359)|Use SortOrder from shims.| +|[#1343](https://github.com/NVIDIA/spark-rapids/pull/1343)|Do not run UDFs when the partition is empty.| +|[#1342](https://github.com/NVIDIA/spark-rapids/pull/1342)|Fix and edit docs for standalone mode| +|[#1350](https://github.com/NVIDIA/spark-rapids/pull/1350)|fix GpuRangePartitioning canonicalization| +|[#1281](https://github.com/NVIDIA/spark-rapids/pull/1281)|Documentation added for testing| +|[#1336](https://github.com/NVIDIA/spark-rapids/pull/1336)|Fix missing post-shuffle coalesce with AQE| +|[#1318](https://github.com/NVIDIA/spark-rapids/pull/1318)|Fix copying GpuFileSourceScanExec node| +|[#1337](https://github.com/NVIDIA/spark-rapids/pull/1337)|Use UTC instead of GMT| +|[#1307](https://github.com/NVIDIA/spark-rapids/pull/1307)|Fallback to cpu when reading Delta log files for stats| +|[#1310](https://github.com/NVIDIA/spark-rapids/pull/1310)|Fix canonicalization of GpuFileSourceScanExec, GpuShuffleCoalesceExec| +|[#1302](https://github.com/NVIDIA/spark-rapids/pull/1302)|Add GpuSubstring handling to SortOrder canonicalization| +|[#1265](https://github.com/NVIDIA/spark-rapids/pull/1265)|Chunking input before writing a ParquetCachedBatch| +|[#1278](https://github.com/NVIDIA/spark-rapids/pull/1278)|Add a config to disable decimal types by default| +|[#1272](https://github.com/NVIDIA/spark-rapids/pull/1272)|Add Alias to shims| +|[#1268](https://github.com/NVIDIA/spark-rapids/pull/1268)|Adds in support docs for 0.3 release| +|[#1235](https://github.com/NVIDIA/spark-rapids/pull/1235)|Trigger reading and handling control data.| +|[#1266](https://github.com/NVIDIA/spark-rapids/pull/1266)|Updating Databricks getting started for 0.3 release| +|[#1291](https://github.com/NVIDIA/spark-rapids/pull/1291)|Increase pre-merge resource requests [skip ci]| +|[#1275](https://github.com/NVIDIA/spark-rapids/pull/1275)|Temporarily disable more CAST tests for Spark 3.1.0| +|[#1264](https://github.com/NVIDIA/spark-rapids/pull/1264)|Fix race condition in batch creation| +|[#1260](https://github.com/NVIDIA/spark-rapids/pull/1260)|Update UCX license info in NOTIFY-binary for 1.9 and RAPIDS plugin copyright dates| +|[#1247](https://github.com/NVIDIA/spark-rapids/pull/1247)|Ensure column names are valid when writing benchmark query results to file| +|[#1240](https://github.com/NVIDIA/spark-rapids/pull/1240)|Fix loading from ORC file with no column names| +|[#1242](https://github.com/NVIDIA/spark-rapids/pull/1242)|Remove compatibility documentation about unsupported INT96| +|[#1192](https://github.com/NVIDIA/spark-rapids/pull/1192)|[REVIEW] Support GpuFilter and GpuCoalesceBatches for decimal data| +|[#1170](https://github.com/NVIDIA/spark-rapids/pull/1170)|Add nested type support to MetaUtils| +|[#1194](https://github.com/NVIDIA/spark-rapids/pull/1194)|Drop redundant total time metric from scan| +|[#1248](https://github.com/NVIDIA/spark-rapids/pull/1248)|At BatchedTableCompressor.finish synchronize to allow for "right-size…| +|[#1169](https://github.com/NVIDIA/spark-rapids/pull/1169)|Use CUDF's "UNBOUNDED" window boundaries for time-range queries.| +|[#1204](https://github.com/NVIDIA/spark-rapids/pull/1204)|Avoid empty batches on columnar to row conversion| +|[#1133](https://github.com/NVIDIA/spark-rapids/pull/1133)|Refactor batch coalesce to be based solely on batch data size| +|[#1237](https://github.com/NVIDIA/spark-rapids/pull/1237)|In transport, limit pending transfer requests to fit within a bounce| +|[#1232](https://github.com/NVIDIA/spark-rapids/pull/1232)|Move SortOrder creation to shims| +|[#1068](https://github.com/NVIDIA/spark-rapids/pull/1068)|Write int96 to parquet| +|[#1193](https://github.com/NVIDIA/spark-rapids/pull/1193)|Verify shuffle of decimal columns| +|[#1180](https://github.com/NVIDIA/spark-rapids/pull/1180)|Remove batches if they are received after the iterator detects that t…| +|[#1173](https://github.com/NVIDIA/spark-rapids/pull/1173)|Support relational operators for decimal type| +|[#1220](https://github.com/NVIDIA/spark-rapids/pull/1220)|Support replacing ORC format when Hive is configured| +|[#1219](https://github.com/NVIDIA/spark-rapids/pull/1219)|Upgrade to jucx 1.9.0| +|[#1081](https://github.com/NVIDIA/spark-rapids/pull/1081)|Add option to upload benchmark summary JSON file| +|[#1217](https://github.com/NVIDIA/spark-rapids/pull/1217)|Aggregate reductions in Complete mode should use updateExpressions| +|[#1218](https://github.com/NVIDIA/spark-rapids/pull/1218)|Remove obsolete HiveStringType usage| +|[#1214](https://github.com/NVIDIA/spark-rapids/pull/1214)|changelog update 2020-11-30. Trigger automerge check [skip ci]| +|[#1210](https://github.com/NVIDIA/spark-rapids/pull/1210)|Support auto-merge for branch-0.4 [skip ci]| +|[#1202](https://github.com/NVIDIA/spark-rapids/pull/1202)|Fix a bug with the support for java.lang.StringBuilder.append.| +|[#1213](https://github.com/NVIDIA/spark-rapids/pull/1213)|Skip casting StringType to TimestampType for Spark 310| +|[#1201](https://github.com/NVIDIA/spark-rapids/pull/1201)|Replace only window expressions on databricks.| +|[#1208](https://github.com/NVIDIA/spark-rapids/pull/1208)|[BUG] Fix GHSL2020-239 [skip ci]| +|[#1205](https://github.com/NVIDIA/spark-rapids/pull/1205)|Fix missing input bytes read metric for Parquet| +|[#1206](https://github.com/NVIDIA/spark-rapids/pull/1206)|Update Spark 3.1 shim for ShuffleOrigin shuffle parameter| +|[#1196](https://github.com/NVIDIA/spark-rapids/pull/1196)|Rename ShuffleCoalesceExec to GpuShuffleCoalesceExec| +|[#1191](https://github.com/NVIDIA/spark-rapids/pull/1191)|Skip window array tests for databricks.| +|[#1183](https://github.com/NVIDIA/spark-rapids/pull/1183)|Support for CalendarIntervalType and NullType| +|[#1150](https://github.com/NVIDIA/spark-rapids/pull/1150)|udf spec| +|[#1188](https://github.com/NVIDIA/spark-rapids/pull/1188)|Add in tests for parquet nested pruning support| +|[#1189](https://github.com/NVIDIA/spark-rapids/pull/1189)|Enable NullType for First and Last in 3.0.1+| +|[#1181](https://github.com/NVIDIA/spark-rapids/pull/1181)|Fix resource leaks in unit tests| +|[#1186](https://github.com/NVIDIA/spark-rapids/pull/1186)|Fix compilation and scaladoc warnings| +|[#1187](https://github.com/NVIDIA/spark-rapids/pull/1187)|Updated documentation for distinct count compatibility| +|[#1182](https://github.com/NVIDIA/spark-rapids/pull/1182)|Close buffer catalog on device manager shutdown| +|[#1137](https://github.com/NVIDIA/spark-rapids/pull/1137)|Let GpuWindowInPandas declare ArrayType supported.| +|[#1176](https://github.com/NVIDIA/spark-rapids/pull/1176)|Add in support for null type| +|[#1174](https://github.com/NVIDIA/spark-rapids/pull/1174)|Fix race condition in SerializeConcatHostBuffersDeserializeBatch| +|[#1175](https://github.com/NVIDIA/spark-rapids/pull/1175)|Fix leaks seen in shuffle tests| +|[#1138](https://github.com/NVIDIA/spark-rapids/pull/1138)|[REVIEW] Support decimal type for GpuProjectExec| +|[#1162](https://github.com/NVIDIA/spark-rapids/pull/1162)|Set job descriptions in benchmark runner| +|[#1172](https://github.com/NVIDIA/spark-rapids/pull/1172)|Revert "Fix race condition (#1165)"| +|[#1060](https://github.com/NVIDIA/spark-rapids/pull/1060)|Show partition metrics for custom shuffler reader| +|[#1152](https://github.com/NVIDIA/spark-rapids/pull/1152)|Add spark301db shim layer for WindowInPandas.| +|[#1167](https://github.com/NVIDIA/spark-rapids/pull/1167)|Nulls out the dataframe if --gc-between-runs is set| +|[#1165](https://github.com/NVIDIA/spark-rapids/pull/1165)|Fix race condition in SerializeConcatHostBuffersDeserializeBatch| +|[#1163](https://github.com/NVIDIA/spark-rapids/pull/1163)|Add in support for GetStructField| +|[#1166](https://github.com/NVIDIA/spark-rapids/pull/1166)|Fix the cast tests for 3.1.0+| +|[#1159](https://github.com/NVIDIA/spark-rapids/pull/1159)|fix bug where 'now' had same value as 'today' for timestamps| +|[#1161](https://github.com/NVIDIA/spark-rapids/pull/1161)|Fix nightly build pipeline failure.| +|[#1160](https://github.com/NVIDIA/spark-rapids/pull/1160)|Fix some performance problems with columnar to columnar conversion| +|[#1105](https://github.com/NVIDIA/spark-rapids/pull/1105)|[REVIEW] Change ColumnViewAccess usage to work with ColumnView| +|[#1148](https://github.com/NVIDIA/spark-rapids/pull/1148)|Add in tests for Maps and extend map support where possible| +|[#1154](https://github.com/NVIDIA/spark-rapids/pull/1154)|Mark test as xfail until we can get a fix in| +|[#1113](https://github.com/NVIDIA/spark-rapids/pull/1113)|Support unix_timestamp on GPU for subset of formats| +|[#1156](https://github.com/NVIDIA/spark-rapids/pull/1156)|Fix warning introduced in iterator suite| +|[#1095](https://github.com/NVIDIA/spark-rapids/pull/1095)|Dependency info| +|[#1145](https://github.com/NVIDIA/spark-rapids/pull/1145)|Remove support for databricks 7.0 runtime - shim spark300db| +|[#1147](https://github.com/NVIDIA/spark-rapids/pull/1147)|Change the assert to require for handling TIMESTAMP_MILLIS in isDateTimeRebaseNeeded | +|[#1132](https://github.com/NVIDIA/spark-rapids/pull/1132)|Add in basic support to read structs from parquet| +|[#1121](https://github.com/NVIDIA/spark-rapids/pull/1121)|Shuffle/better error handling| +|[#1134](https://github.com/NVIDIA/spark-rapids/pull/1134)|Support saveAsTable for writing orc and parquet| +|[#1124](https://github.com/NVIDIA/spark-rapids/pull/1124)|Add shim layers for GpuWindowInPandasExec.| +|[#1131](https://github.com/NVIDIA/spark-rapids/pull/1131)|Add in some basic support for Structs| +|[#1127](https://github.com/NVIDIA/spark-rapids/pull/1127)|Add in basic support for reading lists from parquet| +|[#1129](https://github.com/NVIDIA/spark-rapids/pull/1129)|Fix resource leaks with new shuffle optimization| +|[#1116](https://github.com/NVIDIA/spark-rapids/pull/1116)|Optimize normal shuffle by coalescing smaller batches on host| +|[#1102](https://github.com/NVIDIA/spark-rapids/pull/1102)|Auto-register UDF extention when main plugin is set| +|[#1108](https://github.com/NVIDIA/spark-rapids/pull/1108)|Remove integration test pipelines on NGCC| +|[#1123](https://github.com/NVIDIA/spark-rapids/pull/1123)|Mark Pandas udf over window tests as xfail on databricks until they can be fixed| +|[#1120](https://github.com/NVIDIA/spark-rapids/pull/1120)|Add in support for filtering ArrayType| +|[#1080](https://github.com/NVIDIA/spark-rapids/pull/1080)|Support for CalendarIntervalType and NullType for ParquetCachedSerializer| +|[#994](https://github.com/NVIDIA/spark-rapids/pull/994)|Packs bounce buffers for highly partitioned shuffles| +|[#1112](https://github.com/NVIDIA/spark-rapids/pull/1112)|Remove bad config from pytest setup| +|[#1107](https://github.com/NVIDIA/spark-rapids/pull/1107)|closeOnExcept -> withResources in MetaUtils| +|[#1104](https://github.com/NVIDIA/spark-rapids/pull/1104)|Support lists to/from the GPU| +|[#1106](https://github.com/NVIDIA/spark-rapids/pull/1106)|Improve mechanism for expected exceptions in tests| +|[#1069](https://github.com/NVIDIA/spark-rapids/pull/1069)|Accelerate the data transfer between JVM and Python for the plan 'GpuWindowInPandasExec'| +|[#1099](https://github.com/NVIDIA/spark-rapids/pull/1099)|Update how we deal with type checking| +|[#1077](https://github.com/NVIDIA/spark-rapids/pull/1077)|Improve AQE transitions for shuffle and coalesce batches| +|[#1097](https://github.com/NVIDIA/spark-rapids/pull/1097)|Cleanup some instances of excess closure serialization| +|[#1090](https://github.com/NVIDIA/spark-rapids/pull/1090)|Fix the integration build| +|[#1086](https://github.com/NVIDIA/spark-rapids/pull/1086)|Speed up test performance using pytest-xdist| +|[#1084](https://github.com/NVIDIA/spark-rapids/pull/1084)|Avoid issues where more scalars that expected show up in an expression| +|[#1076](https://github.com/NVIDIA/spark-rapids/pull/1076)|[FEA] Support Databricks 7.3 LTS Runtime| +|[#1083](https://github.com/NVIDIA/spark-rapids/pull/1083)|Revert "Get cudf/spark dependency from the correct .m2 dir"| +|[#1062](https://github.com/NVIDIA/spark-rapids/pull/1062)|Get cudf/spark dependency from the correct .m2 dir| +|[#1078](https://github.com/NVIDIA/spark-rapids/pull/1078)|Another round of fixes for mapping of DataType to DType| +|[#1066](https://github.com/NVIDIA/spark-rapids/pull/1066)|More fixes for conversion to ColumnarBatch| +|[#1029](https://github.com/NVIDIA/spark-rapids/pull/1029)|BenchmarkRunner should produce JSON summary file even when queries fail| +|[#1055](https://github.com/NVIDIA/spark-rapids/pull/1055)|Fix build warnings| +|[#1064](https://github.com/NVIDIA/spark-rapids/pull/1064)|Use array instead of List for from(Table, DataType)| +|[#1057](https://github.com/NVIDIA/spark-rapids/pull/1057)|Fix empty table broadcast requiring a GPU on driver node| +|[#1047](https://github.com/NVIDIA/spark-rapids/pull/1047)|Sanity checks for cudf jar mismatch| +|[#1044](https://github.com/NVIDIA/spark-rapids/pull/1044)|Accelerated row to columnar and columnar to row transitions| +|[#1056](https://github.com/NVIDIA/spark-rapids/pull/1056)|Add query number to Spark app name when running benchmarks| +|[#1054](https://github.com/NVIDIA/spark-rapids/pull/1054)|Log total RMM allocated on GPU OOM| +|[#1053](https://github.com/NVIDIA/spark-rapids/pull/1053)|Remove isGpuBroadcastNestedLoopJoin from shims| +|[#1052](https://github.com/NVIDIA/spark-rapids/pull/1052)|Allow for GPUCoalesceBatch to deal with Map| +|[#1051](https://github.com/NVIDIA/spark-rapids/pull/1051)|Add simple retry for URM dependencies [skip ci]| +|[#1046](https://github.com/NVIDIA/spark-rapids/pull/1046)|Fix broken links| +|[#1017](https://github.com/NVIDIA/spark-rapids/pull/1017)|Log whether PTDS is enabled| +|[#1040](https://github.com/NVIDIA/spark-rapids/pull/1040)|Update to cudf 0.17-SNAPSHOT and fix tests| +|[#1042](https://github.com/NVIDIA/spark-rapids/pull/1042)|Fix inconsistencies in AQE support for broadcast joins| +|[#1037](https://github.com/NVIDIA/spark-rapids/pull/1037)|Add in support for the SQL functions Least and Greatest| +|[#1036](https://github.com/NVIDIA/spark-rapids/pull/1036)|Increase number of retries when waiting for databricks cluster| +|[#1034](https://github.com/NVIDIA/spark-rapids/pull/1034)|[BUG] To honor spark.rapids.memory.gpu.pool=NONE| +|[#854](https://github.com/NVIDIA/spark-rapids/pull/854)|Arbitrary function call in UDF| +|[#1028](https://github.com/NVIDIA/spark-rapids/pull/1028)|Update to cudf-0.16| +|[#1023](https://github.com/NVIDIA/spark-rapids/pull/1023)|Add --gc-between-run flag for TPC* benchmarks.| +|[#1001](https://github.com/NVIDIA/spark-rapids/pull/1001)|ColumnarBatch to CachedBatch and back| +|[#990](https://github.com/NVIDIA/spark-rapids/pull/990)|Parquet coalesce file reader for local filesystems| +|[#1014](https://github.com/NVIDIA/spark-rapids/pull/1014)|Add --append-dat flag for TPC-DS benchmark| +|[#991](https://github.com/NVIDIA/spark-rapids/pull/991)|Updated GCP Dataproc Mortgage-ETL-GPU.ipynb| +|[#886](https://github.com/NVIDIA/spark-rapids/pull/886)|Spark BinaryType and cast to BinaryType| +|[#1016](https://github.com/NVIDIA/spark-rapids/pull/1016)|Change Hash Aggregate to allow pass-through on MapType| +|[#984](https://github.com/NVIDIA/spark-rapids/pull/984)|Add support for MapType in selected operators | +|[#1012](https://github.com/NVIDIA/spark-rapids/pull/1012)|Update for new position parameter in Spark 3.1.0 RegExpReplace| +|[#995](https://github.com/NVIDIA/spark-rapids/pull/995)|Add shim for EMR 3.0.1 and EMR 3.0.1-SNAPSHOT| +|[#998](https://github.com/NVIDIA/spark-rapids/pull/998)|Update benchmark automation script| +|[#1000](https://github.com/NVIDIA/spark-rapids/pull/1000)|Always use RAPIDS shuffle when running TPCH and Mortgage tests| +|[#981](https://github.com/NVIDIA/spark-rapids/pull/981)|Change databricks build to dynamically create a cluster| +|[#986](https://github.com/NVIDIA/spark-rapids/pull/986)|Fix missing dataSize metric when using RAPIDS shuffle| +|[#914](https://github.com/NVIDIA/spark-rapids/pull/914)|Write InternalRow to CachedBatch| +|[#934](https://github.com/NVIDIA/spark-rapids/pull/934)|Iterator to make it easier to work with a window of blocks in the RAPIDS shuffle| +|[#992](https://github.com/NVIDIA/spark-rapids/pull/992)|Skip post-clean if aborted before the image build stage in pre-merge [skip ci]| +|[#988](https://github.com/NVIDIA/spark-rapids/pull/988)|Change in Spark caused the 3.1.0 CI to fail| +|[#983](https://github.com/NVIDIA/spark-rapids/pull/983)|clean jenkins file for premerge on NGCC| +|[#964](https://github.com/NVIDIA/spark-rapids/pull/964)|Refactor TPC benchmarks to reduce duplicate code| +|[#978](https://github.com/NVIDIA/spark-rapids/pull/978)|Enable scalastyle checks for udf-compiler module| +|[#949](https://github.com/NVIDIA/spark-rapids/pull/949)|Fix GpuWindowExec to work with a CPU SortExec| +|[#973](https://github.com/NVIDIA/spark-rapids/pull/973)|Stop reporting totalTime metric for GpuShuffleExchangeExec| +|[#968](https://github.com/NVIDIA/spark-rapids/pull/968)|XFail pos_explode tests until final fix can be put in| +|[#970](https://github.com/NVIDIA/spark-rapids/pull/970)|Add legacy config to clear active Spark 3.1.0 session in tests| +|[#918](https://github.com/NVIDIA/spark-rapids/pull/918)|Benchmark runner script| +|[#915](https://github.com/NVIDIA/spark-rapids/pull/915)|Add option to control number of partitions when converting from CSV to Parquet| +|[#944](https://github.com/NVIDIA/spark-rapids/pull/944)|Fix some issues with non-determinism| +|[#935](https://github.com/NVIDIA/spark-rapids/pull/935)|Add in support/tests for a window count on a column| +|[#940](https://github.com/NVIDIA/spark-rapids/pull/940)|Fix closeOnExcept suppressed exception handling| +|[#942](https://github.com/NVIDIA/spark-rapids/pull/942)|fix github action env setup [skip ci]| +|[#933](https://github.com/NVIDIA/spark-rapids/pull/933)|Update first/last tests to avoid non-determinisim and ordering differences| +|[#931](https://github.com/NVIDIA/spark-rapids/pull/931)|Fix checking for nullable columns in window range query| +|[#924](https://github.com/NVIDIA/spark-rapids/pull/924)|Benchmark guide update for command-line interface / spark-submit| +|[#926](https://github.com/NVIDIA/spark-rapids/pull/926)|Move pandas_udf functions into the tests functions| +|[#929](https://github.com/NVIDIA/spark-rapids/pull/929)|Pick a default tableId to use that is non 0 so that flatbuffers allow…| +|[#928](https://github.com/NVIDIA/spark-rapids/pull/928)|Fix RapidsBufferStore NPE when no spillable buffers are available| +|[#820](https://github.com/NVIDIA/spark-rapids/pull/820)|Benchmarking guide| +|[#859](https://github.com/NVIDIA/spark-rapids/pull/859)|Compare partitioned files in order| +|[#916](https://github.com/NVIDIA/spark-rapids/pull/916)|create new sparkContext explicitly in CPU notebook| +|[#917](https://github.com/NVIDIA/spark-rapids/pull/917)|create new SparkContext in GPU notebook explicitly.| +|[#919](https://github.com/NVIDIA/spark-rapids/pull/919)|Add label benchmark to performance subsection in changelog| +|[#850](https://github.com/NVIDIA/spark-rapids/pull/850)| Add in basic support for lead/lag| +|[#843](https://github.com/NVIDIA/spark-rapids/pull/843)|[REVIEW] Cache plugin to handle reading CachedBatch to an InternalRow| +|[#904](https://github.com/NVIDIA/spark-rapids/pull/904)|Add command-line argument for benchmark result filename| +|[#909](https://github.com/NVIDIA/spark-rapids/pull/909)|GCP preview version image name update| +|[#903](https://github.com/NVIDIA/spark-rapids/pull/903)|update getting-started-gcp.md with new component list| +|[#900](https://github.com/NVIDIA/spark-rapids/pull/900)|Turn off CollectLimitExec replacement by default| +|[#907](https://github.com/NVIDIA/spark-rapids/pull/907)|remove configs from databricks that shouldn't be used by default| +|[#893](https://github.com/NVIDIA/spark-rapids/pull/893)|Fix rounding error when casting timestamp to string for timestamps before 1970| +|[#899](https://github.com/NVIDIA/spark-rapids/pull/899)|Mark reduction corner case tests as xfail on databricks until they can be fixed| +|[#894](https://github.com/NVIDIA/spark-rapids/pull/894)|Replace whole-buffer slicing with direct refcounting| +|[#891](https://github.com/NVIDIA/spark-rapids/pull/891)|Add config to dump heap on GPU OOM| +|[#890](https://github.com/NVIDIA/spark-rapids/pull/890)|Clean up CoalesceBatch to use withResource| +|[#892](https://github.com/NVIDIA/spark-rapids/pull/892)|Only manifest the current batch in cached block shuffle read iterator| +|[#871](https://github.com/NVIDIA/spark-rapids/pull/871)|Add support for using the arena allocator| +|[#889](https://github.com/NVIDIA/spark-rapids/pull/889)|Fix crash on scalar only orderby| +|[#879](https://github.com/NVIDIA/spark-rapids/pull/879)|Update SpillableColumnarBatch to remove buffer from catalog on close| +|[#888](https://github.com/NVIDIA/spark-rapids/pull/888)|Shrink detect scope to compile only [skip ci]| +|[#885](https://github.com/NVIDIA/spark-rapids/pull/885)|[BUG] fix IT dockerfile arguments [skip ci]| +|[#883](https://github.com/NVIDIA/spark-rapids/pull/883)|[BUG] fix IT dockerfile args ordering [skip ci]| +|[#875](https://github.com/NVIDIA/spark-rapids/pull/875)|fix the non-consistency for `spark.rapids.sql.format.parquet.multiThreadedRead` in RapidsConf.scala| +|[#862](https://github.com/NVIDIA/spark-rapids/pull/862)|Migrate nightly&integration pipelines to blossom [skip ci]| +|[#872](https://github.com/NVIDIA/spark-rapids/pull/872)|Ensure that receive-side batches use GpuColumnVectorFromBuffer to avoid| +|[#833](https://github.com/NVIDIA/spark-rapids/pull/833)|Add nvcomp LZ4 codec support| +|[#870](https://github.com/NVIDIA/spark-rapids/pull/870)|Cleaned up tests and documentation for csv timestamp parsing| +|[#823](https://github.com/NVIDIA/spark-rapids/pull/823)|Add command-line interface for TPC-* for use with spark-submit| +|[#856](https://github.com/NVIDIA/spark-rapids/pull/856)|Move GpuWindowInPandasExec in shims layers| +|[#756](https://github.com/NVIDIA/spark-rapids/pull/756)|Add stream-time metric| +|[#832](https://github.com/NVIDIA/spark-rapids/pull/832)|Skip pandas tests if pandas cannot be found| +|[#841](https://github.com/NVIDIA/spark-rapids/pull/841)|Fix a hanging issue when processing empty data.| +|[#840](https://github.com/NVIDIA/spark-rapids/pull/840)|[REVIEW] Fixed failing cache tests| +|[#848](https://github.com/NVIDIA/spark-rapids/pull/848)|Update task memory and disk spill metrics when buffer store spills| +|[#851](https://github.com/NVIDIA/spark-rapids/pull/851)|Use contiguous table when deserializing columnar batch| +|[#857](https://github.com/NVIDIA/spark-rapids/pull/857)|fix pvc scheduling issue| +|[#853](https://github.com/NVIDIA/spark-rapids/pull/853)|Remove nodeAffinity from premerge pipeline| +|[#796](https://github.com/NVIDIA/spark-rapids/pull/796)|Record spark plan SQL metrics to JSON when running benchmarks| +|[#781](https://github.com/NVIDIA/spark-rapids/pull/781)|Add AQE unit tests| +|[#824](https://github.com/NVIDIA/spark-rapids/pull/824)|Skip cudf_udf test by default| +|[#839](https://github.com/NVIDIA/spark-rapids/pull/839)|First/Last reduction and cleanup of agg APIs| +|[#827](https://github.com/NVIDIA/spark-rapids/pull/827)|Add Spark 3.0 EMR Shim layer | +|[#816](https://github.com/NVIDIA/spark-rapids/pull/816)|[BUG] fix nightly is timing out| +|[#782](https://github.com/NVIDIA/spark-rapids/pull/782)|Benchmark utility to perform diff of output from benchmark runs, allowing for precision differences| +|[#813](https://github.com/NVIDIA/spark-rapids/pull/813)|Revert "Enable tests in udf_cudf_test.py"| +|[#788](https://github.com/NVIDIA/spark-rapids/pull/788)|[FEA] Persist workspace data on PVC for premerge| +|[#805](https://github.com/NVIDIA/spark-rapids/pull/805)|[FEA] nightly build trigger both IT on spark 300 and 301| +|[#797](https://github.com/NVIDIA/spark-rapids/pull/797)|Allow host spill store to fit a buffer larger than configured max size| +|[#807](https://github.com/NVIDIA/spark-rapids/pull/807)|Deploy integration-tests javadoc and sources| +|[#777](https://github.com/NVIDIA/spark-rapids/pull/777)|Enable tests in udf_cudf_test.py| +|[#790](https://github.com/NVIDIA/spark-rapids/pull/790)|CI: Update cudf python to 0.16 nightly| +|[#772](https://github.com/NVIDIA/spark-rapids/pull/772)|Add support for empty array construction.| +|[#783](https://github.com/NVIDIA/spark-rapids/pull/783)|Improved GpuArrowEvalPythonExec| +|[#771](https://github.com/NVIDIA/spark-rapids/pull/771)|Various improvements to benchmarks| +|[#763](https://github.com/NVIDIA/spark-rapids/pull/763)|[REVIEW] Allow CoalesceBatch to spill data that is not in active use| +|[#727](https://github.com/NVIDIA/spark-rapids/pull/727)|Update cudf dependency to 0.16-SNAPSHOT| +|[#726](https://github.com/NVIDIA/spark-rapids/pull/726)|parquet writer support for TIMESTAMP_MILLIS| +|[#674](https://github.com/NVIDIA/spark-rapids/pull/674)|Unit test for GPU exchange re-use with AQE| +|[#723](https://github.com/NVIDIA/spark-rapids/pull/723)|Update code coverage to find source files in new places| +|[#766](https://github.com/NVIDIA/spark-rapids/pull/766)|Update the integration Dockerfile to reduce the image size| +|[#762](https://github.com/NVIDIA/spark-rapids/pull/762)|Fixing conflicts in branch-0.3| +|[#738](https://github.com/NVIDIA/spark-rapids/pull/738)|[auto-merge] branch-0.2 to branch-0.3 - resolve conflict| +|[#722](https://github.com/NVIDIA/spark-rapids/pull/722)|Initial code changes to support spilling outside of shuffle| +|[#693](https://github.com/NVIDIA/spark-rapids/pull/693)|Update jenkins files for 0.3| +|[#692](https://github.com/NVIDIA/spark-rapids/pull/692)|Merge shims dependency to spark-3.0.1 into branch-0.3| +|[#690](https://github.com/NVIDIA/spark-rapids/pull/690)|Update the version to 0.3.0-SNAPSHOT| + +## Release 0.2 + +### Features +||| +|:---|:---| +|[#696](https://github.com/NVIDIA/spark-rapids/issues/696)|[FEA] run integration tests against SPARK-3.0.1| +|[#455](https://github.com/NVIDIA/spark-rapids/issues/455)|[FEA] Support UCX shuffle with optimized AQE| +|[#510](https://github.com/NVIDIA/spark-rapids/issues/510)|[FEA] Investigate libcudf features needed to support struct schema pruning during loads| +|[#541](https://github.com/NVIDIA/spark-rapids/issues/541)|[FEA] Scala UDF:Support for null Value operands| +|[#542](https://github.com/NVIDIA/spark-rapids/issues/542)|[FEA] Scala UDF: Support for Date and Time | +|[#499](https://github.com/NVIDIA/spark-rapids/issues/499)|[FEA] disable any kind of warnings about ExecutedCommandExec not being on the GPU| +|[#540](https://github.com/NVIDIA/spark-rapids/issues/540)|[FEA] Scala UDF: Support for String replaceFirst()| +|[#340](https://github.com/NVIDIA/spark-rapids/issues/340)|[FEA] widen the rendered Jekyll pages| +|[#602](https://github.com/NVIDIA/spark-rapids/issues/602)|[FEA] don't release with any -SNAPSHOT dependencies| +|[#579](https://github.com/NVIDIA/spark-rapids/issues/579)|[FEA] Auto-merge between branches| +|[#515](https://github.com/NVIDIA/spark-rapids/issues/515)|[FEA] Write tests for AQE skewed join optimization| +|[#452](https://github.com/NVIDIA/spark-rapids/issues/452)|[FEA] Update HashSortOptimizerSuite to work with AQE| +|[#454](https://github.com/NVIDIA/spark-rapids/issues/454)|[FEA] Update GpuCoalesceBatchesSuite to work with AQE enabled| +|[#354](https://github.com/NVIDIA/spark-rapids/issues/354)|[FEA]Spark 3.1 FileSourceScanExec adds parameter optionalNumCoalescedBuckets| +|[#566](https://github.com/NVIDIA/spark-rapids/issues/566)|[FEA] Add support for StringSplit with an array index.| +|[#524](https://github.com/NVIDIA/spark-rapids/issues/524)|[FEA] Add GPU specific metrics to GpuFileSourceScanExec| +|[#494](https://github.com/NVIDIA/spark-rapids/issues/494)|[FEA] Add some AQE-specific tests to the PySpark test suite| +|[#146](https://github.com/NVIDIA/spark-rapids/issues/146)|[FEA] Python tests should support running with Adaptive Query Execution enabled| +|[#465](https://github.com/NVIDIA/spark-rapids/issues/465)|[FEA] Audit: Update script to audit multiple versions of Spark | +|[#488](https://github.com/NVIDIA/spark-rapids/issues/488)|[FEA] Ability to limit total GPU memory used| +|[#70](https://github.com/NVIDIA/spark-rapids/issues/70)|[FEA] Support StringSplit| +|[#403](https://github.com/NVIDIA/spark-rapids/issues/403)|[FEA] Add in support for GetArrayItem| +|[#493](https://github.com/NVIDIA/spark-rapids/issues/493)|[FEA] Implement shuffle optimization when AQE is enabled| +|[#500](https://github.com/NVIDIA/spark-rapids/issues/500)|[FEA] Add maven profiles for testing with AQE on or off| +|[#471](https://github.com/NVIDIA/spark-rapids/issues/471)|[FEA] create a formal process for updating the github-pages branch| +|[#233](https://github.com/NVIDIA/spark-rapids/issues/233)|[FEA] Audit DataWritingCommandExec | +|[#240](https://github.com/NVIDIA/spark-rapids/issues/240)|[FEA] Audit Api validation script follow on - Optimize StringToTypeTag | +|[#388](https://github.com/NVIDIA/spark-rapids/issues/388)|[FEA] Audit WindowExec| +|[#425](https://github.com/NVIDIA/spark-rapids/issues/425)|[FEA] Add tests for configs in BatchScan Readers| +|[#453](https://github.com/NVIDIA/spark-rapids/issues/453)|[FEA] Update HashAggregatesSuite to work with AQE| +|[#184](https://github.com/NVIDIA/spark-rapids/issues/184)|[FEA] Enable NoScalaDoc scalastyle rule| +|[#438](https://github.com/NVIDIA/spark-rapids/issues/438)|[FEA] Enable StringLPad| +|[#232](https://github.com/NVIDIA/spark-rapids/issues/232)|[FEA] Audit SortExec | +|[#236](https://github.com/NVIDIA/spark-rapids/issues/236)|[FEA] Audit ShuffleExchangeExec | +|[#355](https://github.com/NVIDIA/spark-rapids/issues/355)|[FEA] Support Multiple Spark versions in the same jar| +|[#385](https://github.com/NVIDIA/spark-rapids/issues/385)|[FEA] Support RangeExec on the GPU| +|[#317](https://github.com/NVIDIA/spark-rapids/issues/317)|[FEA] Write test wrapper to run SQL queries via pyspark| +|[#235](https://github.com/NVIDIA/spark-rapids/issues/235)|[FEA] Audit BroadcastExchangeExec| +|[#234](https://github.com/NVIDIA/spark-rapids/issues/234)|[FEA] Audit BatchScanExec| +|[#238](https://github.com/NVIDIA/spark-rapids/issues/238)|[FEA] Audit ShuffledHashJoinExec | +|[#237](https://github.com/NVIDIA/spark-rapids/issues/237)|[FEA] Audit BroadcastHashJoinExec | +|[#316](https://github.com/NVIDIA/spark-rapids/issues/316)|[FEA] Add some basic Dataframe tests for CoalesceExec| +|[#145](https://github.com/NVIDIA/spark-rapids/issues/145)|[FEA] Scala tests should support running with Adaptive Query Execution enabled| +|[#231](https://github.com/NVIDIA/spark-rapids/issues/231)|[FEA] Audit ProjectExec | +|[#229](https://github.com/NVIDIA/spark-rapids/issues/229)|[FEA] Audit FileSourceScanExec | + +### Performance +||| +|:---|:---| +|[#326](https://github.com/NVIDIA/spark-rapids/issues/326)|[DISCUSS] Shuffle read-side error handling| +|[#601](https://github.com/NVIDIA/spark-rapids/issues/601)|[FEA] Optimize unnecessary sorts when replacing SortAggregate| +|[#333](https://github.com/NVIDIA/spark-rapids/issues/333)|[FEA] Better handling of reading lots of small Parquet files| +|[#511](https://github.com/NVIDIA/spark-rapids/issues/511)|[FEA] Connect shuffle table compression to shuffle exec metrics| +|[#15](https://github.com/NVIDIA/spark-rapids/issues/15)|[FEA] Multiple threads sharing the same GPU| +|[#272](https://github.com/NVIDIA/spark-rapids/issues/272)|[DOC] Getting started guide for UCX shuffle| + +### Bugs Fixed +||| +|:---|:---| +|[#780](https://github.com/NVIDIA/spark-rapids/issues/780)|[BUG] Inner Join dropping data with bucketed Table input| +|[#569](https://github.com/NVIDIA/spark-rapids/issues/569)|[BUG] left_semi_join operation is abnormal and serious time-consuming| +|[#744](https://github.com/NVIDIA/spark-rapids/issues/744)|[BUG] TPC-DS query 6 now produces incorrect results.| +|[#718](https://github.com/NVIDIA/spark-rapids/issues/718)|[BUG] GpuBroadcastHashJoinExec ArrayIndexOutOfBoundsException| +|[#698](https://github.com/NVIDIA/spark-rapids/issues/698)|[BUG] batch coalesce can fail to appear between columnar shuffle and subsequent columnar operation| +|[#658](https://github.com/NVIDIA/spark-rapids/issues/658)|[BUG] GpuCoalesceBatches collectTime metric can be underreported| +|[#59](https://github.com/NVIDIA/spark-rapids/issues/59)|[BUG] enable tests for string literals in a select| +|[#486](https://github.com/NVIDIA/spark-rapids/issues/486)|[BUG] GpuWindowExec does not implement requiredChildOrdering| +|[#631](https://github.com/NVIDIA/spark-rapids/issues/631)|[BUG] Rows are dropped when AQE is enabled in some cases| +|[#671](https://github.com/NVIDIA/spark-rapids/issues/671)|[BUG] Databricks hash_aggregate_test fails trying to canonicalize a WrappedAggFunction| +|[#218](https://github.com/NVIDIA/spark-rapids/issues/218)|[BUG] Window function COUNT(x) includes null-values, when it shouldn't| +|[#153](https://github.com/NVIDIA/spark-rapids/issues/153)|[BUG] Incorrect output from partial-only hash aggregates with multiple distincts and non-distinct functions| +|[#656](https://github.com/NVIDIA/spark-rapids/issues/656)|[BUG] integration tests produce hive metadata files| +|[#607](https://github.com/NVIDIA/spark-rapids/issues/607)|[BUG] Fix misleading "cannot run on GPU" warnings when AQE is enabled| +|[#630](https://github.com/NVIDIA/spark-rapids/issues/630)|[BUG] GpuCustomShuffleReader metrics always show zero rows/batches output| +|[#643](https://github.com/NVIDIA/spark-rapids/issues/643)|[BUG] race condition while registering a buffer and spilling at the same time| +|[#606](https://github.com/NVIDIA/spark-rapids/issues/606)|[BUG] Multiple scans for same data source with TPC-DS query59 with delta format| +|[#626](https://github.com/NVIDIA/spark-rapids/issues/626)|[BUG] parquet_test showing leaked memory buffer| +|[#155](https://github.com/NVIDIA/spark-rapids/issues/155)|[BUG] Incorrect output from averages with filters in partial only mode| +|[#277](https://github.com/NVIDIA/spark-rapids/issues/277)|[BUG] HashAggregateSuite failure when AQE is enabled| +|[#276](https://github.com/NVIDIA/spark-rapids/issues/276)|[BUG] GpuCoalesceBatchSuite failure when AQE is enabled| +|[#598](https://github.com/NVIDIA/spark-rapids/issues/598)|[BUG] Non-deterministic output from MapOutputTracker.getStatistics() with AQE on GPU| +|[#192](https://github.com/NVIDIA/spark-rapids/issues/192)|[BUG] test_read_merge_schema fails on Databricks| +|[#341](https://github.com/NVIDIA/spark-rapids/issues/341)|[BUG] Document compression formats for readers/writers| +|[#587](https://github.com/NVIDIA/spark-rapids/issues/587)|[BUG] Spark3.1 changed FileScan which means or GpuScans need to be added to shim layer| +|[#362](https://github.com/NVIDIA/spark-rapids/issues/362)|[BUG] Implement getReaderForRange in the RapidsShuffleManager| +|[#528](https://github.com/NVIDIA/spark-rapids/issues/528)|[BUG] HashAggregateSuite "Avg Distinct with filter" no longer valid when testing against Spark 3.1.0| +|[#416](https://github.com/NVIDIA/spark-rapids/issues/416)|[BUG] Fix Spark 3.1.0 integration tests| +|[#556](https://github.com/NVIDIA/spark-rapids/issues/556)|[BUG] NPE when removing shuffle| +|[#553](https://github.com/NVIDIA/spark-rapids/issues/553)|[BUG] GpuColumnVector build warnings from raw type access| +|[#492](https://github.com/NVIDIA/spark-rapids/issues/492)|[BUG] Re-enable AQE integration tests| +|[#275](https://github.com/NVIDIA/spark-rapids/issues/275)|[BUG] TpchLike query 2 fails when AQE is enabled| +|[#508](https://github.com/NVIDIA/spark-rapids/issues/508)|[BUG] GpuUnion publishes metrics on the UI that are all 0| +|[#269](https://github.com/NVIDIA/spark-rapids/issues/269)|Needed to add `--conf spark.driver.extraClassPath=` | +|[#473](https://github.com/NVIDIA/spark-rapids/issues/473)|[BUG] PartMerge:countDistinct:sum fails sporadically| +|[#531](https://github.com/NVIDIA/spark-rapids/issues/531)|[BUG] Temporary RMM workaround needs to be removed| +|[#532](https://github.com/NVIDIA/spark-rapids/issues/532)|[BUG] NPE when enabling shuffle manager| +|[#525](https://github.com/NVIDIA/spark-rapids/issues/525)|[BUG] GpuFilterExec reports incorrect nullability of output in some cases| +|[#483](https://github.com/NVIDIA/spark-rapids/issues/483)|[BUG] Multiple scans for the same parquet data source| +|[#382](https://github.com/NVIDIA/spark-rapids/issues/382)|[BUG] Spark3.1 StringFallbackSuite regexp_replace null cpu fall back test fails.| +|[#489](https://github.com/NVIDIA/spark-rapids/issues/489)|[FEA] Fix Spark 3.1 GpuHashJoin since it now requires CodegenSupport| +|[#441](https://github.com/NVIDIA/spark-rapids/issues/441)|[BUG] test_broadcast_nested_loop_join_special_case fails on databricks| +|[#347](https://github.com/NVIDIA/spark-rapids/issues/347)|[BUG] Failed to read Parquet file generated by GPU-enabled Spark.| +|[#433](https://github.com/NVIDIA/spark-rapids/issues/433)|`InSet` operator produces an error for Strings| +|[#144](https://github.com/NVIDIA/spark-rapids/issues/144)|[BUG] spark.sql.legacy.parquet.datetimeRebaseModeInWrite is ignored| +|[#323](https://github.com/NVIDIA/spark-rapids/issues/323)|[BUG] GpuBroadcastNestedLoopJoinExec can fail if there are no columns| +|[#356](https://github.com/NVIDIA/spark-rapids/issues/356)|[BUG] Integration cache test for BroadcastNestedLoopJoin failure| +|[#280](https://github.com/NVIDIA/spark-rapids/issues/280)|[BUG] Full Outer Join does not work on nullable keys| +|[#149](https://github.com/NVIDIA/spark-rapids/issues/149)|[BUG] Spark driver fails to load native libs when running on node without CUDA| + +### PRs +||| +|:---|:---| +|[#826](https://github.com/NVIDIA/spark-rapids/pull/826)|Fix link to cudf-0.15-cuda11.jar| +|[#815](https://github.com/NVIDIA/spark-rapids/pull/815)|Update documentation for Scala UDFs in 0.2 since you need two things| +|[#802](https://github.com/NVIDIA/spark-rapids/pull/802)|Update 0.2 CHANGELOG| +|[#793](https://github.com/NVIDIA/spark-rapids/pull/793)|Update Jenkins scripts for release| +|[#798](https://github.com/NVIDIA/spark-rapids/pull/798)|Fix shims provider override config not being seen by executors| +|[#785](https://github.com/NVIDIA/spark-rapids/pull/785)|Make shuffle run on CPU if we do a join where we read from bucketed table| +|[#765](https://github.com/NVIDIA/spark-rapids/pull/765)|Add config to override shims provider class| +|[#759](https://github.com/NVIDIA/spark-rapids/pull/759)|Add CHANGELOG for release 0.2| +|[#758](https://github.com/NVIDIA/spark-rapids/pull/758)|Skip the udf test fails periodically.| +|[#752](https://github.com/NVIDIA/spark-rapids/pull/752)|Fix snapshot plugin jar version in docs| +|[#751](https://github.com/NVIDIA/spark-rapids/pull/751)|Correct the channel for cudf installation| +|[#754](https://github.com/NVIDIA/spark-rapids/pull/754)|Filter nulls from joins where possible to improve performance| +|[#732](https://github.com/NVIDIA/spark-rapids/pull/732)|Add a timeout for RapidsShuffleIterator to prevent jobs to hang infin…| +|[#637](https://github.com/NVIDIA/spark-rapids/pull/637)|Documentation changes for 0.2 release | +|[#747](https://github.com/NVIDIA/spark-rapids/pull/747)|Disable udf tests that fail periodically| +|[#745](https://github.com/NVIDIA/spark-rapids/pull/745)|Revert Null Join Filter| +|[#741](https://github.com/NVIDIA/spark-rapids/pull/741)|Fix issue with parquet partitioned reads| +|[#733](https://github.com/NVIDIA/spark-rapids/pull/733)|Remove GPU Types from github| +|[#720](https://github.com/NVIDIA/spark-rapids/pull/720)|Stop removing GpuCoalesceBatches from non-AQE queries when AQE is enabled| +|[#729](https://github.com/NVIDIA/spark-rapids/pull/729)|Fix collect time metric in CoalesceBatches| +|[#640](https://github.com/NVIDIA/spark-rapids/pull/640)|Support running Pandas UDFs on GPUs in Python processes.| +|[#721](https://github.com/NVIDIA/spark-rapids/pull/721)|Add some more checks to databricks build scripts| +|[#714](https://github.com/NVIDIA/spark-rapids/pull/714)|Move spark 3.0.1-shims out of snapshot-shims| +|[#711](https://github.com/NVIDIA/spark-rapids/pull/711)|fix blossom checkout repo| +|[#709](https://github.com/NVIDIA/spark-rapids/pull/709)|[BUG] fix unexpected indentation issue in blossom yml| +|[#642](https://github.com/NVIDIA/spark-rapids/pull/642)|Init workflow for blossom-ci| +|[#705](https://github.com/NVIDIA/spark-rapids/pull/705)|Enable configuration check for cast string to timestamp| +|[#702](https://github.com/NVIDIA/spark-rapids/pull/702)|Update slack channel for Jenkins builds| +|[#701](https://github.com/NVIDIA/spark-rapids/pull/701)|fix checkout-ref for automerge| +|[#695](https://github.com/NVIDIA/spark-rapids/pull/695)|Fix spark-3.0.1 shim to be released| +|[#668](https://github.com/NVIDIA/spark-rapids/pull/668)|refactor automerge to support merge for protected branch| +|[#687](https://github.com/NVIDIA/spark-rapids/pull/687)|Include the UDF compiler in the dist jar| +|[#689](https://github.com/NVIDIA/spark-rapids/pull/689)|Change shims dependency to spark-3.0.1| +|[#677](https://github.com/NVIDIA/spark-rapids/pull/677)|Use multi-threaded parquet read with small files| +|[#638](https://github.com/NVIDIA/spark-rapids/pull/638)|Add Parquet-based cache serializer| +|[#613](https://github.com/NVIDIA/spark-rapids/pull/613)|Enable UCX + AQE| +|[#684](https://github.com/NVIDIA/spark-rapids/pull/684)|Enable test for literal string values in a select| +|[#686](https://github.com/NVIDIA/spark-rapids/pull/686)|Remove sorts when replacing sort aggregate if possible| +|[#675](https://github.com/NVIDIA/spark-rapids/pull/675)|Added TimeAdd| +|[#645](https://github.com/NVIDIA/spark-rapids/pull/645)|[window] Add GpuWindowExec requiredChildOrdering| +|[#676](https://github.com/NVIDIA/spark-rapids/pull/676)|fixUpJoinConsistency rule now works when AQE is enabled| +|[#683](https://github.com/NVIDIA/spark-rapids/pull/683)|Fix issues with cannonicalization of WrappedAggFunction| +|[#682](https://github.com/NVIDIA/spark-rapids/pull/682)|Fix path to start-slave.sh script in docs| +|[#673](https://github.com/NVIDIA/spark-rapids/pull/673)|Increase build timeouts on nightly and premerge builds| +|[#648](https://github.com/NVIDIA/spark-rapids/pull/648)|add signoff-check use github actions| +|[#593](https://github.com/NVIDIA/spark-rapids/pull/593)|Add support for isNaN and datetime related instructions in UDF compiler| +|[#666](https://github.com/NVIDIA/spark-rapids/pull/666)|[window] Disable GPU for COUNT(exp) queries| +|[#655](https://github.com/NVIDIA/spark-rapids/pull/655)|Implement AQE unit test for InsertAdaptiveSparkPlan| +|[#614](https://github.com/NVIDIA/spark-rapids/pull/614)|Fix for aggregation with multiple distinct and non distinct functions| +|[#657](https://github.com/NVIDIA/spark-rapids/pull/657)|Fix verify build after integration tests are run| +|[#660](https://github.com/NVIDIA/spark-rapids/pull/660)|Add in neverReplaceExec and several rules for it| +|[#639](https://github.com/NVIDIA/spark-rapids/pull/639)|BooleanType test shouldn't xfail| +|[#652](https://github.com/NVIDIA/spark-rapids/pull/652)|Mark UVM config as internal until supported| +|[#653](https://github.com/NVIDIA/spark-rapids/pull/653)|Move to the cudf-0.15 release| +|[#647](https://github.com/NVIDIA/spark-rapids/pull/647)|Improve warnings about AQE nodes not supported on GPU| +|[#646](https://github.com/NVIDIA/spark-rapids/pull/646)|Stop reporting zero metrics for GpuCustomShuffleReader| +|[#644](https://github.com/NVIDIA/spark-rapids/pull/644)|Small fix for race in catalog where a buffer could get spilled while …| +|[#623](https://github.com/NVIDIA/spark-rapids/pull/623)|Fix issues with canonicalization| +|[#599](https://github.com/NVIDIA/spark-rapids/pull/599)|[FEA] changelog generator| +|[#563](https://github.com/NVIDIA/spark-rapids/pull/563)|cudf and spark version info in artifacts| +|[#633](https://github.com/NVIDIA/spark-rapids/pull/633)|Fix leak if RebaseHelper throws during Parquet read| +|[#632](https://github.com/NVIDIA/spark-rapids/pull/632)|Copy function isSearchableType from Spark because signature changed in 3.0.1| +|[#583](https://github.com/NVIDIA/spark-rapids/pull/583)|Add udf compiler unit tests| +|[#617](https://github.com/NVIDIA/spark-rapids/pull/617)|Documentation updates for branch 0.2| +|[#616](https://github.com/NVIDIA/spark-rapids/pull/616)|Add config to reserve GPU memory| +|[#612](https://github.com/NVIDIA/spark-rapids/pull/612)|[REVIEW] Fix incorrect output from averages with filters in partial only mode| +|[#609](https://github.com/NVIDIA/spark-rapids/pull/609)|fix minor issues with instructions for building ucx| +|[#611](https://github.com/NVIDIA/spark-rapids/pull/611)|Added in profile to enable shims for SNAPSHOT releases| +|[#595](https://github.com/NVIDIA/spark-rapids/pull/595)|Parquet small file reading optimization| +|[#582](https://github.com/NVIDIA/spark-rapids/pull/582)|fix #579 Auto-merge between branches| +|[#536](https://github.com/NVIDIA/spark-rapids/pull/536)|Add test for skewed join optimization when AQE is enabled| +|[#603](https://github.com/NVIDIA/spark-rapids/pull/603)|Fix data size metric always 0 when using RAPIDS shuffle| +|[#600](https://github.com/NVIDIA/spark-rapids/pull/600)|Fix calculation of string data for compressed batches| +|[#597](https://github.com/NVIDIA/spark-rapids/pull/597)|Remove the xfail for parquet test_read_merge_schema on Databricks| +|[#591](https://github.com/NVIDIA/spark-rapids/pull/591)|Add ucx license in NOTICE-binary| +|[#596](https://github.com/NVIDIA/spark-rapids/pull/596)|Add Spark 3.0.2 to Shim layer| +|[#594](https://github.com/NVIDIA/spark-rapids/pull/594)|Filter nulls from joins where possible to improve performance.| +|[#590](https://github.com/NVIDIA/spark-rapids/pull/590)|Move GpuParquetScan/GpuOrcScan into Shim| +|[#588](https://github.com/NVIDIA/spark-rapids/pull/588)|xfail the tpch spark 3.1.0 tests that fail| +|[#572](https://github.com/NVIDIA/spark-rapids/pull/572)|Update buffer store to return compressed batches directly, add compression NVTX ranges| +|[#558](https://github.com/NVIDIA/spark-rapids/pull/558)|Fix unit tests when AQE is enabled| +|[#580](https://github.com/NVIDIA/spark-rapids/pull/580)|xfail the Spark 3.1.0 integration tests that fail | +|[#565](https://github.com/NVIDIA/spark-rapids/pull/565)|Minor improvements to TPC-DS benchmarking code| +|[#567](https://github.com/NVIDIA/spark-rapids/pull/567)|Explicitly disable AQE in one test| +|[#571](https://github.com/NVIDIA/spark-rapids/pull/571)|Fix Databricks shim layer for GpuFileSourceScanExec and GpuBroadcastExchangeExec| +|[#564](https://github.com/NVIDIA/spark-rapids/pull/564)|Add GPU decode time metric to scans| +|[#562](https://github.com/NVIDIA/spark-rapids/pull/562)|getCatalog can be called from the driver, and can return null| +|[#555](https://github.com/NVIDIA/spark-rapids/pull/555)|Fix build warnings for ColumnViewAccess| +|[#560](https://github.com/NVIDIA/spark-rapids/pull/560)|Fix databricks build for AQE support| +|[#557](https://github.com/NVIDIA/spark-rapids/pull/557)|Fix tests failing on Spark 3.1| +|[#547](https://github.com/NVIDIA/spark-rapids/pull/547)|Add GPU metrics to GpuFileSourceScanExec| +|[#462](https://github.com/NVIDIA/spark-rapids/pull/462)|Implement optimized AQE support so that exchanges run on GPU where possible| +|[#550](https://github.com/NVIDIA/spark-rapids/pull/550)|Document Parquet and ORC compression support| +|[#539](https://github.com/NVIDIA/spark-rapids/pull/539)|Update script to audit multiple Spark versions| +|[#543](https://github.com/NVIDIA/spark-rapids/pull/543)|Add metrics to GpuUnion operator| +|[#549](https://github.com/NVIDIA/spark-rapids/pull/549)|Move spark shim properties to top level pom| +|[#497](https://github.com/NVIDIA/spark-rapids/pull/497)|Add UDF compiler implementations| +|[#487](https://github.com/NVIDIA/spark-rapids/pull/487)|Add framework for batch compression of shuffle partitions| +|[#544](https://github.com/NVIDIA/spark-rapids/pull/544)|Add in driverExtraClassPath for standalone mode docs| +|[#546](https://github.com/NVIDIA/spark-rapids/pull/546)|Fix Spark 3.1.0 shim build error in GpuHashJoin| +|[#537](https://github.com/NVIDIA/spark-rapids/pull/537)|Use fresh SparkSession when capturing to avoid late capture of previous query| +|[#538](https://github.com/NVIDIA/spark-rapids/pull/538)|Revert "Temporary workaround for RMM initial pool size bug (#530)"| +|[#517](https://github.com/NVIDIA/spark-rapids/pull/517)|Add config to limit maximum RMM pool size| +|[#527](https://github.com/NVIDIA/spark-rapids/pull/527)|Add support for split and getArrayIndex| +|[#534](https://github.com/NVIDIA/spark-rapids/pull/534)|Fixes bugs around GpuShuffleEnv initialization| +|[#529](https://github.com/NVIDIA/spark-rapids/pull/529)|[BUG] Degenerate table metas were not getting copied to the heap| +|[#530](https://github.com/NVIDIA/spark-rapids/pull/530)|Temporary workaround for RMM initial pool size bug| +|[#526](https://github.com/NVIDIA/spark-rapids/pull/526)|Fix bug with nullability reporting in GpuFilterExec| +|[#521](https://github.com/NVIDIA/spark-rapids/pull/521)|Fix typo with databricks shim classname SparkShimServiceProvider| +|[#522](https://github.com/NVIDIA/spark-rapids/pull/522)|Use SQLConf instead of SparkConf when looking up SQL configs| +|[#518](https://github.com/NVIDIA/spark-rapids/pull/518)|Fix init order issue in GpuShuffleEnv when RAPIDS shuffle configured| +|[#514](https://github.com/NVIDIA/spark-rapids/pull/514)|Added clarification of RegExpReplace, DateDiff, made descriptive text consistent| +|[#506](https://github.com/NVIDIA/spark-rapids/pull/506)|Add in basic support for running tpcds like queries| +|[#504](https://github.com/NVIDIA/spark-rapids/pull/504)|Add ability to ignore tests depending on spark shim version| +|[#503](https://github.com/NVIDIA/spark-rapids/pull/503)|Remove unused async buffer spill support| +|[#501](https://github.com/NVIDIA/spark-rapids/pull/501)|disable codegen in 3.1 shim for hash join| +|[#466](https://github.com/NVIDIA/spark-rapids/pull/466)|Optimize and fix Api validation script| +|[#481](https://github.com/NVIDIA/spark-rapids/pull/481)|Codeowners| +|[#439](https://github.com/NVIDIA/spark-rapids/pull/439)|Check a PR has been committed using git signoff| +|[#319](https://github.com/NVIDIA/spark-rapids/pull/319)|Update partitioning logic in ShuffledBatchRDD| +|[#491](https://github.com/NVIDIA/spark-rapids/pull/491)|Temporarily ignore AQE integration tests| +|[#490](https://github.com/NVIDIA/spark-rapids/pull/490)|Fix Spark 3.1.0 build for HashJoin changes| +|[#482](https://github.com/NVIDIA/spark-rapids/pull/482)|Prevent bad practice in python tests| +|[#485](https://github.com/NVIDIA/spark-rapids/pull/485)|Show plan in assertion message if test fails| +|[#480](https://github.com/NVIDIA/spark-rapids/pull/480)|Fix link from README to getting-started.md| +|[#448](https://github.com/NVIDIA/spark-rapids/pull/448)|Preliminary support for keeping broadcast exchanges on GPU when AQE is enabled| +|[#478](https://github.com/NVIDIA/spark-rapids/pull/478)|Fall back to CPU for binary as string in parquet| +|[#477](https://github.com/NVIDIA/spark-rapids/pull/477)|Fix special case joins in broadcast nested loop join| +|[#469](https://github.com/NVIDIA/spark-rapids/pull/469)|Update HashAggregateSuite to work with AQE| +|[#475](https://github.com/NVIDIA/spark-rapids/pull/475)|Udf compiler pom followup| +|[#434](https://github.com/NVIDIA/spark-rapids/pull/434)|Add UDF compiler skeleton| +|[#474](https://github.com/NVIDIA/spark-rapids/pull/474)|Re-enable noscaladoc check| +|[#461](https://github.com/NVIDIA/spark-rapids/pull/461)|Fix comments style to pass scala style check| +|[#468](https://github.com/NVIDIA/spark-rapids/pull/468)|fix broken link| +|[#456](https://github.com/NVIDIA/spark-rapids/pull/456)|Add closeOnExcept to clean up code that closes resources only on exceptions| +|[#464](https://github.com/NVIDIA/spark-rapids/pull/464)|Turn off noscaladoc rule until codebase is fixed| +|[#449](https://github.com/NVIDIA/spark-rapids/pull/449)|Enforce NoScalaDoc rule in scalastyle checks| +|[#450](https://github.com/NVIDIA/spark-rapids/pull/450)|Enable scalastyle for shuffle plugin| +|[#451](https://github.com/NVIDIA/spark-rapids/pull/451)|Databricks remove unneeded files and fix build to not fail on rm when file missing| +|[#442](https://github.com/NVIDIA/spark-rapids/pull/442)|Shim layer support for Spark 3.0.0 Databricks| +|[#447](https://github.com/NVIDIA/spark-rapids/pull/447)|Add scalastyle plugin to shim module| +|[#426](https://github.com/NVIDIA/spark-rapids/pull/426)|Update BufferMeta to support multiple codec buffers per table| +|[#440](https://github.com/NVIDIA/spark-rapids/pull/440)|Run mortgage test both with AQE on and off| +|[#445](https://github.com/NVIDIA/spark-rapids/pull/445)|Added in StringRPad and StringLPad| +|[#422](https://github.com/NVIDIA/spark-rapids/pull/422)|Documentation updates| +|[#437](https://github.com/NVIDIA/spark-rapids/pull/437)|Fix bug with InSet and Strings| +|[#435](https://github.com/NVIDIA/spark-rapids/pull/435)|Add in checks for Parquet LEGACY date/time rebase| +|[#432](https://github.com/NVIDIA/spark-rapids/pull/432)|Fix batch use-after-close in partitioning, shuffle env init| +|[#423](https://github.com/NVIDIA/spark-rapids/pull/423)|Fix duplicates includes in assembly jar| +|[#418](https://github.com/NVIDIA/spark-rapids/pull/418)|CI Add unit tests running for Spark 3.0.1| +|[#421](https://github.com/NVIDIA/spark-rapids/pull/421)|Make it easier to run TPCxBB benchmarks from spark shell| +|[#413](https://github.com/NVIDIA/spark-rapids/pull/413)|Fix download link| +|[#414](https://github.com/NVIDIA/spark-rapids/pull/414)|Shim Layer to support multiple Spark versions | +|[#406](https://github.com/NVIDIA/spark-rapids/pull/406)|Update cast handling to deal with new libcudf casting limitations| +|[#405](https://github.com/NVIDIA/spark-rapids/pull/405)|Change slave->worker| +|[#395](https://github.com/NVIDIA/spark-rapids/pull/395)|Databricks doc updates| +|[#401](https://github.com/NVIDIA/spark-rapids/pull/401)|Extended the FAQ| +|[#398](https://github.com/NVIDIA/spark-rapids/pull/398)|Add tests for GpuPartition| +|[#352](https://github.com/NVIDIA/spark-rapids/pull/352)|Change spark tgz package name| +|[#397](https://github.com/NVIDIA/spark-rapids/pull/397)|Fix small bug in ShuffleBufferCatalog.hasActiveShuffle| +|[#286](https://github.com/NVIDIA/spark-rapids/pull/286)|[REVIEW] Updated join tests for cache| +|[#393](https://github.com/NVIDIA/spark-rapids/pull/393)|Contributor license agreement| +|[#389](https://github.com/NVIDIA/spark-rapids/pull/389)|Added in support for RangeExec| +|[#390](https://github.com/NVIDIA/spark-rapids/pull/390)|Ucx getting started| +|[#391](https://github.com/NVIDIA/spark-rapids/pull/391)|Hide slack channel in Jenkins scripts| +|[#387](https://github.com/NVIDIA/spark-rapids/pull/387)|Remove the term whitelist| +|[#365](https://github.com/NVIDIA/spark-rapids/pull/365)|[REVIEW] Timesub tests| +|[#383](https://github.com/NVIDIA/spark-rapids/pull/383)|Test utility to compare SQL query results between CPU and GPU| +|[#380](https://github.com/NVIDIA/spark-rapids/pull/380)|Fix databricks notebook link| +|[#378](https://github.com/NVIDIA/spark-rapids/pull/378)|Added in FAQ and fixed spelling| +|[#377](https://github.com/NVIDIA/spark-rapids/pull/377)|Update heading in configs.md| +|[#373](https://github.com/NVIDIA/spark-rapids/pull/373)|Modifying branch name to conform with rapidsai branch name change| +|[#376](https://github.com/NVIDIA/spark-rapids/pull/376)|Add our session extension correctly if there are other extensions configured| +|[#374](https://github.com/NVIDIA/spark-rapids/pull/374)|Fix rat issue for notebooks| +|[#364](https://github.com/NVIDIA/spark-rapids/pull/364)|Update Databricks patch for changes to GpuSortMergeJoin| +|[#371](https://github.com/NVIDIA/spark-rapids/pull/371)|fix typo and use regional bucket per GCP's update| +|[#359](https://github.com/NVIDIA/spark-rapids/pull/359)|Karthik changes| +|[#353](https://github.com/NVIDIA/spark-rapids/pull/353)|Fix broadcast nested loop join for the no column case| +|[#313](https://github.com/NVIDIA/spark-rapids/pull/313)|Additional tests for broadcast hash join| +|[#342](https://github.com/NVIDIA/spark-rapids/pull/342)|Implement build-side rules for shuffle hash join| +|[#349](https://github.com/NVIDIA/spark-rapids/pull/349)|Updated join code to treat null equality properly| +|[#335](https://github.com/NVIDIA/spark-rapids/pull/335)|Integration tests on spark 3.0.1-SNAPSHOT & 3.1.0-SNAPSHOT| +|[#346](https://github.com/NVIDIA/spark-rapids/pull/346)|Update the Title Header for Fine Tuning| +|[#344](https://github.com/NVIDIA/spark-rapids/pull/344)|Fix small typo in readme| +|[#331](https://github.com/NVIDIA/spark-rapids/pull/331)|Adds iterator and client unit tests, and prepares for more fetch failure handling| +|[#337](https://github.com/NVIDIA/spark-rapids/pull/337)|Fix Scala compile phase to allow Java classes referencing Scala classes| +|[#332](https://github.com/NVIDIA/spark-rapids/pull/332)|Match GPU overwritten functions with SQL functions from FunctionRegistry| +|[#339](https://github.com/NVIDIA/spark-rapids/pull/339)|Fix databricks build| +|[#338](https://github.com/NVIDIA/spark-rapids/pull/338)|Move GpuPartitioning to a separate file| +|[#310](https://github.com/NVIDIA/spark-rapids/pull/310)|Update release Jenkinsfile for Databricks| +|[#330](https://github.com/NVIDIA/spark-rapids/pull/330)|Hide private info in Jenkins scripts| +|[#324](https://github.com/NVIDIA/spark-rapids/pull/324)|Add in basic support for GpuCartesianProductExec| +|[#328](https://github.com/NVIDIA/spark-rapids/pull/328)|Enable slack notification for Databricks build| +|[#321](https://github.com/NVIDIA/spark-rapids/pull/321)|update databricks patch for GpuBroadcastNestedLoopJoinExec| +|[#322](https://github.com/NVIDIA/spark-rapids/pull/322)|Add oss.sonatype.org to download the cudf jar| +|[#320](https://github.com/NVIDIA/spark-rapids/pull/320)|Don't mount passwd/group to the container| +|[#258](https://github.com/NVIDIA/spark-rapids/pull/258)|Enable running TPCH tests with AQE enabled| +|[#318](https://github.com/NVIDIA/spark-rapids/pull/318)|Build docker image with Dockerfile| +|[#309](https://github.com/NVIDIA/spark-rapids/pull/309)|Update databricks patch to latest changes| +|[#312](https://github.com/NVIDIA/spark-rapids/pull/312)|Trigger branch-0.2 integration test| +|[#307](https://github.com/NVIDIA/spark-rapids/pull/307)|[Jenkins] Update the release script and Jenkinsfile| +|[#304](https://github.com/NVIDIA/spark-rapids/pull/304)|[DOC][Minor] Fix typo in spark config name.| +|[#303](https://github.com/NVIDIA/spark-rapids/pull/303)|Update compatibility doc for -0.0 issues| +|[#301](https://github.com/NVIDIA/spark-rapids/pull/301)|Add info about branches in README.md| +|[#296](https://github.com/NVIDIA/spark-rapids/pull/296)|Added in basic support for broadcast nested loop join| +|[#297](https://github.com/NVIDIA/spark-rapids/pull/297)|Databricks CI improvements and support runtime env parameter to xfail certain tests| +|[#292](https://github.com/NVIDIA/spark-rapids/pull/292)|Move artifacts version in version-def.sh| +|[#254](https://github.com/NVIDIA/spark-rapids/pull/254)|Cleanup QA tests| +|[#289](https://github.com/NVIDIA/spark-rapids/pull/289)|Clean up GpuCollectLimitMeta and add in metrics| +|[#287](https://github.com/NVIDIA/spark-rapids/pull/287)|Add in support for right join and fix issues build right| +|[#273](https://github.com/NVIDIA/spark-rapids/pull/273)|Added releases to the README.md| +|[#285](https://github.com/NVIDIA/spark-rapids/pull/285)|modify run_pyspark_from_build.sh to be bash 3 friendly| +|[#281](https://github.com/NVIDIA/spark-rapids/pull/281)|Add in support for Full Outer Join on non-null keys| +|[#274](https://github.com/NVIDIA/spark-rapids/pull/274)|Add RapidsDiskStore tests| +|[#259](https://github.com/NVIDIA/spark-rapids/pull/259)|Add RapidsHostMemoryStore tests| +|[#282](https://github.com/NVIDIA/spark-rapids/pull/282)|Update Databricks patch for 0.2 branch| +|[#261](https://github.com/NVIDIA/spark-rapids/pull/261)|Add conditional xfail test for DISTINCT aggregates with NaN| +|[#263](https://github.com/NVIDIA/spark-rapids/pull/263)|More time ops| +|[#256](https://github.com/NVIDIA/spark-rapids/pull/256)|Remove special cases for contains, startsWith, and endWith| +|[#253](https://github.com/NVIDIA/spark-rapids/pull/253)|Remove GpuAttributeReference and GpuSortOrder| +|[#271](https://github.com/NVIDIA/spark-rapids/pull/271)|Update the versions for 0.2.0 properly for the databricks build| +|[#162](https://github.com/NVIDIA/spark-rapids/pull/162)|Integration tests for corner cases in window functions.| +|[#264](https://github.com/NVIDIA/spark-rapids/pull/264)|Add a local mvn repo for nightly pipeline| +|[#262](https://github.com/NVIDIA/spark-rapids/pull/262)|Refer to branch-0.2| +|[#255](https://github.com/NVIDIA/spark-rapids/pull/255)|Revert change to make dependencies of shaded jar optional| +|[#257](https://github.com/NVIDIA/spark-rapids/pull/257)|Fix link to RAPIDS cudf in index.md| +|[#252](https://github.com/NVIDIA/spark-rapids/pull/252)|Update to 0.2.0-SNAPSHOT and cudf-0.15-SNAPSHOT| + +## Release 0.1 + +### Features +||| +|:---|:---| +|[#74](https://github.com/NVIDIA/spark-rapids/issues/74)|[FEA] Support ToUnixTimestamp| +|[#21](https://github.com/NVIDIA/spark-rapids/issues/21)|[FEA] NormalizeNansAndZeros| +|[#105](https://github.com/NVIDIA/spark-rapids/issues/105)|[FEA] integration tests for equi-joins| + +### Bugs Fixed +||| +|:---|:---| +|[#116](https://github.com/NVIDIA/spark-rapids/issues/116)|[BUG] calling replace with a NULL throws an exception| +|[#168](https://github.com/NVIDIA/spark-rapids/issues/168)|[BUG] GpuUnitTests Date tests leak column vectors| +|[#209](https://github.com/NVIDIA/spark-rapids/issues/209)|[BUG] Developers section in pom need to be updated| +|[#204](https://github.com/NVIDIA/spark-rapids/issues/204)|[BUG] Code coverage docs are out of date| +|[#154](https://github.com/NVIDIA/spark-rapids/issues/154)|[BUG] Incorrect output from partial-only averages with nulls| +|[#61](https://github.com/NVIDIA/spark-rapids/issues/61)|[BUG] Cannot disable Parquet, ORC, CSV reading when using FileSourceScanExec| + +### PRs +||| +|:---|:---| +|[#249](https://github.com/NVIDIA/spark-rapids/pull/249)|Compatability -> Compatibility| +|[#247](https://github.com/NVIDIA/spark-rapids/pull/247)|Add index.md for default doc page, fix table formatting for configs| +|[#241](https://github.com/NVIDIA/spark-rapids/pull/241)|Let default branch to master per the release rule| +|[#177](https://github.com/NVIDIA/spark-rapids/pull/177)|Fixed leaks in unit test and use ColumnarBatch for testing| +|[#243](https://github.com/NVIDIA/spark-rapids/pull/243)|Jenkins file for Databricks release| +|[#225](https://github.com/NVIDIA/spark-rapids/pull/225)|Make internal project dependencies optional for shaded artifact| +|[#242](https://github.com/NVIDIA/spark-rapids/pull/242)|Add site pages| +|[#221](https://github.com/NVIDIA/spark-rapids/pull/221)|Databricks Build Support| +|[#215](https://github.com/NVIDIA/spark-rapids/pull/215)|Remove CudfColumnVector| +|[#213](https://github.com/NVIDIA/spark-rapids/pull/213)|Add RapidsDeviceMemoryStore tests| +|[#214](https://github.com/NVIDIA/spark-rapids/pull/214)|[REVIEW] Test failure to pass Attribute as GpuAttribute| +|[#211](https://github.com/NVIDIA/spark-rapids/pull/211)|Add project leads to pom developer list| +|[#210](https://github.com/NVIDIA/spark-rapids/pull/210)|Updated coverage docs| +|[#195](https://github.com/NVIDIA/spark-rapids/pull/195)|Support public release for plugin jar| +|[#208](https://github.com/NVIDIA/spark-rapids/pull/208)|Remove unneeded comment from pom.xml| +|[#191](https://github.com/NVIDIA/spark-rapids/pull/191)|WindowExec handle different spark distributions| +|[#181](https://github.com/NVIDIA/spark-rapids/pull/181)|Remove INCOMPAT for NormalizeNanAndZero, KnownFloatingPointNormalized| +|[#196](https://github.com/NVIDIA/spark-rapids/pull/196)|Update Spark dependency to the released 3.0.0 artifacts| +|[#206](https://github.com/NVIDIA/spark-rapids/pull/206)|Change groupID to 'com.nvidia' in IT scripts| +|[#202](https://github.com/NVIDIA/spark-rapids/pull/202)|Fixed issue for contains when searching for an empty string| +|[#201](https://github.com/NVIDIA/spark-rapids/pull/201)|Fix name of scan| +|[#200](https://github.com/NVIDIA/spark-rapids/pull/200)|Fix issue with GpuAttributeReference not overrideing references| +|[#197](https://github.com/NVIDIA/spark-rapids/pull/197)|Fix metrics for writes| +|[#186](https://github.com/NVIDIA/spark-rapids/pull/186)|Fixed issue with nullability on concat| +|[#193](https://github.com/NVIDIA/spark-rapids/pull/193)|Add RapidsBufferCatalog tests| +|[#188](https://github.com/NVIDIA/spark-rapids/pull/188)|rebrand to com.nvidia instead of ai.rapids| +|[#189](https://github.com/NVIDIA/spark-rapids/pull/189)|Handle AggregateExpression having resultIds parameter instead of a single resultId| +|[#190](https://github.com/NVIDIA/spark-rapids/pull/190)|FileSourceScanExec can have logicalRelation parameter on some distributions| +|[#185](https://github.com/NVIDIA/spark-rapids/pull/185)|Update type of parameter of GpuExpandExec to make it consistent| +|[#172](https://github.com/NVIDIA/spark-rapids/pull/172)|Merge qa test to integration test| +|[#180](https://github.com/NVIDIA/spark-rapids/pull/180)|Add MetaUtils unit tests| +|[#171](https://github.com/NVIDIA/spark-rapids/pull/171)|Cleanup scaladoc warnings about missing links| +|[#176](https://github.com/NVIDIA/spark-rapids/pull/176)|Updated join tests to cover more data.| +|[#169](https://github.com/NVIDIA/spark-rapids/pull/169)|Remove dependency on shaded Spark artifact| +|[#174](https://github.com/NVIDIA/spark-rapids/pull/174)|Added in fallback tests| +|[#165](https://github.com/NVIDIA/spark-rapids/pull/165)|Move input metadata tests to pyspark| +|[#173](https://github.com/NVIDIA/spark-rapids/pull/173)|Fix setting local mode for tests| +|[#160](https://github.com/NVIDIA/spark-rapids/pull/160)|Integration tests for normalizing NaN/zeroes.| +|[#163](https://github.com/NVIDIA/spark-rapids/pull/163)|Ignore the order locally for repartition tests| +|[#157](https://github.com/NVIDIA/spark-rapids/pull/157)|Add partial and final only hash aggregate tests and fix nulls corner case for Average| +|[#159](https://github.com/NVIDIA/spark-rapids/pull/159)|Add integration tests for joins| +|[#158](https://github.com/NVIDIA/spark-rapids/pull/158)|Orc merge schema fallback and FileScan format configs| +|[#164](https://github.com/NVIDIA/spark-rapids/pull/164)|Fix compiler warnings| +|[#152](https://github.com/NVIDIA/spark-rapids/pull/152)|Moved cudf to 0.14 for CI| +|[#151](https://github.com/NVIDIA/spark-rapids/pull/151)|Switch CICD pipelines to Github| + +## Older Releases +Changelog of older releases can be found at [docs/archives](/docs/archives) diff --git a/docs/configs.md b/docs/configs.md index d79514badf6..6fd15ac25b7 100644 --- a/docs/configs.md +++ b/docs/configs.md @@ -164,6 +164,7 @@ Name | SQL Function(s) | Description | Default Value | Notes spark.rapids.sql.expression.Atanh|`atanh`|Inverse hyperbolic tangent|true|None| spark.rapids.sql.expression.AttributeReference| |References an input column|true|None| spark.rapids.sql.expression.BRound|`bround`|Round an expression to d decimal places using HALF_EVEN rounding mode|true|None| +spark.rapids.sql.expression.BitLength|`bit_length`|The bit length of string data|true|None| spark.rapids.sql.expression.BitwiseAnd|`&`|Returns the bitwise AND of the operands|true|None| spark.rapids.sql.expression.BitwiseNot|`~`|Returns the bitwise NOT of the operands|true|None| spark.rapids.sql.expression.BitwiseOr|`\|`|Returns the bitwise OR of the operands|true|None| @@ -255,6 +256,7 @@ Name | SQL Function(s) | Description | Default Value | Notes spark.rapids.sql.expression.NaNvl|`nanvl`|Evaluates to `left` iff left is not NaN, `right` otherwise|true|None| spark.rapids.sql.expression.NamedLambdaVariable| |A parameter to a higher order SQL function|true|None| spark.rapids.sql.expression.Not|`!`, `not`|Boolean not operator|true|None| +spark.rapids.sql.expression.OctetLength|`octet_length`|The byte length of string data|true|None| spark.rapids.sql.expression.Or|`or`|Logical OR|true|None| spark.rapids.sql.expression.Pmod|`pmod`|Pmod|true|None| spark.rapids.sql.expression.PosExplode|`posexplode_outer`, `posexplode`|Given an input array produces a sequence of rows for each value in the array|true|None| diff --git a/docs/download.md b/docs/download.md index a16a774d18e..7cb561a5ae6 100644 --- a/docs/download.md +++ b/docs/download.md @@ -18,6 +18,69 @@ cuDF jar, that is either preinstalled in the Spark classpath on all nodes or sub that uses the RAPIDS Accelerator For Apache Spark. See the [getting-started guide](https://nvidia.github.io/spark-rapids/Getting-Started/) for more details. +## Release v22.02.0 +Hardware Requirements: + +The plugin is tested on the following architectures: + + GPU Models: NVIDIA V100, T4 and A2/A10/A30/A100 GPUs + +Software Requirements: + + OS: Ubuntu 18.04, Ubuntu 20.04 or CentOS 7, CentOS 8 + + CUDA & NVIDIA Drivers*: 11.x & v450.80.02+ + + Apache Spark 3.0.1, 3.0.2, 3.0.3, 3.1.1, 3.1.2, 3.2.0, 3.2.1, Cloudera CDP 7.1.6, 7.1.7, Databricks 7.3 ML LTS or 9.1 ML LTS Runtime and GCP Dataproc 2.0 + + Python 3.6+, Scala 2.12, Java 8 + +*Some hardware may have a minimum driver version greater than v450.80.02+. Check the GPU spec sheet +for your hardware's minimum driver version. + +### Download v22.02.0 +* Download the [RAPIDS + Accelerator for Apache Spark 22.02.0 jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.02.0/rapids-4-spark_2.12-22.02.0.jar) +* Download the [RAPIDS cuDF 22.02.0 jar](https://repo1.maven.org/maven2/ai/rapids/cudf/22.02.0/cudf-22.02.0-cuda11.jar) + +This package is built against CUDA 11.5 and has [CUDA forward +compatibility](https://docs.nvidia.com/deploy/cuda-compatibility/index.html) enabled. It is tested +on V100, T4, A2, A10, A30 and A100 GPUs with CUDA 11.0-11.5. For those using other types of GPUs which +do not have CUDA forward compatibility (for example, GeForce), CUDA 11.5 is required. Users will +need to ensure the minimum driver (450.80.02) and CUDA toolkit are installed on each Spark node. + +### Verify signature +* Download the [RAPIDS Accelerator for Apache Spark 22.02.0 jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.02.0/rapids-4-spark_2.12-22.02.0.jar) + and [RAPIDS Accelerator for Apache Spark 22.02.0 jars.asc](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.02.0/rapids-4-spark_2.12-22.02.0.jar.asc) +* Download the [PUB_KEY](https://keys.openpgp.org/search?q=sw-spark@nvidia.com). +* Import the public key: `gpg --import PUB_KEY` +* Verify the signature: `gpg --verify rapids-4-spark_2.12-22.02.0.jar.asc rapids-4-spark_2.12-22.02.0.jar` + +The output if signature verify: + + gpg: Good signature from "NVIDIA Spark (For the signature of spark-rapids release jars) " + +### Release Notes +New functionality and performance improvements for this release include: +* Parquet reader and writer support for decimal precision up to 38 digits (128-bits) +* Decimal 128-bits casting + * Casting of decimal 128-bits values in nested types + * Casting to String from decimal 128-bits + * Casting from String to decimal 128-bits +* MIG on YARN support +* GPU explain only mode for Spark 3.x and 2.x +* JSON reader support +* Sequence function support +* regexp_extract function support +* Min and max on single-level struct +* CreateMap updates and enable CreateMap by default +* Cast from array to string +* Add regular expression support to regexp_replace function +* Support for conditional joins using libcudf's mixed join feature + +For a detailed list of changes, please refer to the +[CHANGELOG](https://github.com/NVIDIA/spark-rapids/blob/main/CHANGELOG.md). + ## Release v21.12.0 Hardware Requirements: diff --git a/docs/supported_ops.md b/docs/supported_ops.md index 52d7fc28468..2a6535befa0 100644 --- a/docs/supported_ops.md +++ b/docs/supported_ops.md @@ -2745,6 +2745,79 @@ are limited. +BitLength +`bit_length` +The bit length of string data +None +project +input + + + + + + + + + +S + + +NS + + + + + + + +result + + + +S + + + + + + + + + + + + + + + + +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + BitwiseAnd `&` Returns the bitwise AND of the operands @@ -2877,32 +2950,6 @@ are limited. -Expression -SQL Functions(s) -Description -Notes -Context -Param/Output -BOOLEAN -BYTE -SHORT -INT -LONG -FLOAT -DOUBLE -DATE -TIMESTAMP -STRING -DECIMAL -NULL -BINARY -CALENDAR -ARRAY -MAP -STRUCT -UDT - - BitwiseNot `~` Returns the bitwise NOT of the operands @@ -3125,6 +3172,32 @@ are limited. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + BitwiseXor `^` Returns the bitwise XOR of the operands @@ -3257,32 +3330,6 @@ are limited. -Expression -SQL Functions(s) -Description -Notes -Context -Param/Output -BOOLEAN -BYTE -SHORT -INT -LONG -FLOAT -DOUBLE -DATE -TIMESTAMP -STRING -DECIMAL -NULL -BINARY -CALENDAR -ARRAY -MAP -STRUCT -UDT - - CaseWhen `when` CASE WHEN expression @@ -3488,6 +3535,32 @@ are limited. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + CheckOverflow CheckOverflow after arithmetic operations between DecimalType data @@ -3629,32 +3702,6 @@ are limited. -Expression -SQL Functions(s) -Description -Notes -Context -Param/Output -BOOLEAN -BYTE -SHORT -INT -LONG -FLOAT -DOUBLE -DATE -TIMESTAMP -STRING -DECIMAL -NULL -BINARY -CALENDAR -ARRAY -MAP -STRUCT -UDT - - ConcatWs `concat_ws` Concatenates multiple input strings or array of strings into a single string using a given separator @@ -3860,6 +3907,32 @@ are limited. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Cosh `cosh` Hyperbolic cosine @@ -4040,32 +4113,6 @@ are limited. -Expression -SQL Functions(s) -Description -Notes -Context -Param/Output -BOOLEAN -BYTE -SHORT -INT -LONG -FLOAT -DOUBLE -DATE -TIMESTAMP -STRING -DECIMAL -NULL -BINARY -CALENDAR -ARRAY -MAP -STRUCT -UDT - - CreateArray `array` Returns an array with the given elements @@ -4228,6 +4275,32 @@ are limited. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + CurrentRow$ Special boundary for a window frame, indicating stopping at the current row @@ -4458,32 +4531,6 @@ are limited. -Expression -SQL Functions(s) -Description -Notes -Context -Param/Output -BOOLEAN -BYTE -SHORT -INT -LONG -FLOAT -DOUBLE -DATE -TIMESTAMP -STRING -DECIMAL -NULL -BINARY -CALENDAR -ARRAY -MAP -STRUCT -UDT - - DateFormatClass `date_format` Converts timestamp to a value of string in the format specified by the date format @@ -4620,6 +4667,32 @@ are limited. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + DayOfMonth `dayofmonth`, `day` Returns the day of the month from a date or timestamp @@ -4876,32 +4949,6 @@ are limited. -Expression -SQL Functions(s) -Description -Notes -Context -Param/Output -BOOLEAN -BYTE -SHORT -INT -LONG -FLOAT -DOUBLE -DATE -TIMESTAMP -STRING -DECIMAL -NULL -BINARY -CALENDAR -ARRAY -MAP -STRUCT -UDT - - ElementAt `element_at` Returns element of array at given(1-based) index in value if column is array. Returns value for the given key in value if column is map @@ -5038,6 +5085,32 @@ are limited. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + EqualNullSafe `<=>` Check if the values are equal including nulls <=> @@ -5238,32 +5311,6 @@ are limited. -Expression -SQL Functions(s) -Description -Notes -Context -Param/Output -BOOLEAN -BYTE -SHORT -INT -LONG -FLOAT -DOUBLE -DATE -TIMESTAMP -STRING -DECIMAL -NULL -BINARY -CALENDAR -ARRAY -MAP -STRUCT -UDT - - Exp `exp` Euler's number e raised to a power @@ -5401,6 +5448,32 @@ are limited. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Expm1 `expm1` Euler's number e raised to a power minus 1 @@ -5606,32 +5679,6 @@ are limited. -Expression -SQL Functions(s) -Description -Notes -Context -Param/Output -BOOLEAN -BYTE -SHORT -INT -LONG -FLOAT -DOUBLE -DATE -TIMESTAMP -STRING -DECIMAL -NULL -BINARY -CALENDAR -ARRAY -MAP -STRUCT -UDT - - GetArrayItem Gets the field at `ordinal` in the Array @@ -5768,6 +5815,32 @@ are limited. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + GetMapValue Gets Value from a Map based on a key @@ -6083,32 +6156,6 @@ are limited. -Expression -SQL Functions(s) -Description -Notes -Context -Param/Output -BOOLEAN -BYTE -SHORT -INT -LONG -FLOAT -DOUBLE -DATE -TIMESTAMP -STRING -DECIMAL -NULL -BINARY -CALENDAR -ARRAY -MAP -STRUCT -UDT - - GreaterThanOrEqual `>=` >= operator @@ -6241,6 +6288,32 @@ are limited. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Greatest `greatest` Returns the greatest value of all parameters, skipping null values @@ -6492,32 +6565,6 @@ are limited. NS -Expression -SQL Functions(s) -Description -Notes -Context -Param/Output -BOOLEAN -BYTE -SHORT -INT -LONG -FLOAT -DOUBLE -DATE -TIMESTAMP -STRING -DECIMAL -NULL -BINARY -CALENDAR -ARRAY -MAP -STRUCT -UDT - - In `in` IN operator @@ -6633,6 +6680,32 @@ are limited. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + InitCap `initcap` Returns str with the first letter of each word in uppercase. All other letters are in lowercase @@ -6873,32 +6946,6 @@ are limited. -Expression -SQL Functions(s) -Description -Notes -Context -Param/Output -BOOLEAN -BYTE -SHORT -INT -LONG -FLOAT -DOUBLE -DATE -TIMESTAMP -STRING -DECIMAL -NULL -BINARY -CALENDAR -ARRAY -MAP -STRUCT -UDT - - IsNotNull `isnotnull` Checks if a value is not null @@ -7040,6 +7087,32 @@ are limited. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + KnownNotNull Tag an expression as known to not be null @@ -7244,32 +7317,6 @@ are limited. NS -Expression -SQL Functions(s) -Description -Notes -Context -Param/Output -BOOLEAN -BYTE -SHORT -INT -LONG -FLOAT -DOUBLE -DATE -TIMESTAMP -STRING -DECIMAL -NULL -BINARY -CALENDAR -ARRAY -MAP -STRUCT -UDT - - LastDay `last_day` Returns the last day of the month which the date belongs to @@ -7406,6 +7453,32 @@ are limited. NS +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Least `least` Returns the least value of all parameters, skipping null values @@ -7632,32 +7705,6 @@ are limited. -Expression -SQL Functions(s) -Description -Notes -Context -Param/Output -BOOLEAN -BYTE -SHORT -INT -LONG -FLOAT -DOUBLE -DATE -TIMESTAMP -STRING -DECIMAL -NULL -BINARY -CALENDAR -ARRAY -MAP -STRUCT -UDT - - LessThanOrEqual `<=` <= operator @@ -7790,6 +7837,32 @@ are limited. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Like `like` Like @@ -8000,32 +8073,6 @@ are limited. -Expression -SQL Functions(s) -Description -Notes -Context -Param/Output -BOOLEAN -BYTE -SHORT -INT -LONG -FLOAT -DOUBLE -DATE -TIMESTAMP -STRING -DECIMAL -NULL -BINARY -CALENDAR -ARRAY -MAP -STRUCT -UDT - - Log1p `log1p` Natural log 1 + expr @@ -8188,6 +8235,32 @@ are limited. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Lower `lower`, `lcase` String lowercase operator @@ -8376,32 +8449,6 @@ are limited. -Expression -SQL Functions(s) -Description -Notes -Context -Param/Output -BOOLEAN -BYTE -SHORT -INT -LONG -FLOAT -DOUBLE -DATE -TIMESTAMP -STRING -DECIMAL -NULL -BINARY -CALENDAR -ARRAY -MAP -STRUCT -UDT - - MapValues `map_values` Returns an unordered array containing the values of the map @@ -8569,6 +8616,32 @@ are limited. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Month `month` Returns the month from a date or timestamp @@ -8742,36 +8815,10 @@ are limited. - - - - - - -Expression -SQL Functions(s) -Description -Notes -Context -Param/Output -BOOLEAN -BYTE -SHORT -INT -LONG -FLOAT -DOUBLE -DATE -TIMESTAMP -STRING -DECIMAL -NULL -BINARY -CALENDAR -ARRAY -MAP -STRUCT -UDT + + + + Murmur3Hash @@ -9005,6 +9052,79 @@ are limited. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + +OctetLength +`octet_length` +The byte length of string data +None +project +input + + + + + + + + + +S + + +NS + + + + + + + +result + + + +S + + + + + + + + + + + + + + + + Or `or` Logical OR @@ -9137,32 +9257,6 @@ are limited. -Expression -SQL Functions(s) -Description -Notes -Context -Param/Output -BOOLEAN -BYTE -SHORT -INT -LONG -FLOAT -DOUBLE -DATE -TIMESTAMP -STRING -DECIMAL -NULL -BINARY -CALENDAR -ARRAY -MAP -STRUCT -UDT - - Pmod `pmod` Pmod @@ -9410,6 +9504,32 @@ are limited. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + PreciseTimestampConversion Expression used internally to convert the TimestampType to Long and back without losing precision, i.e. in microseconds. Used in time windowing @@ -9504,32 +9624,6 @@ are limited. -Expression -SQL Functions(s) -Description -Notes -Context -Param/Output -BOOLEAN -BYTE -SHORT -INT -LONG -FLOAT -DOUBLE -DATE -TIMESTAMP -STRING -DECIMAL -NULL -BINARY -CALENDAR -ARRAY -MAP -STRUCT -UDT - - PythonUDF UDF run in an external python process. Does not actually run on the GPU, but the transfer of data to/from it can be accelerated @@ -9821,6 +9915,32 @@ are limited. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Rand `random`, `rand` Generate a random column with i.i.d. uniformly distributed values in [0, 1) @@ -9868,32 +9988,6 @@ are limited. -Expression -SQL Functions(s) -Description -Notes -Context -Param/Output -BOOLEAN -BYTE -SHORT -INT -LONG -FLOAT -DOUBLE -DATE -TIMESTAMP -STRING -DECIMAL -NULL -BINARY -CALENDAR -ARRAY -MAP -STRUCT -UDT - - Rank `rank` Window function that returns the rank value within the aggregation window @@ -10187,6 +10281,32 @@ are limited. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + ReplicateRows Given an input row replicates the row N times @@ -10234,32 +10354,6 @@ are limited. -Expression -SQL Functions(s) -Description -Notes -Context -Param/Output -BOOLEAN -BYTE -SHORT -INT -LONG -FLOAT -DOUBLE -DATE -TIMESTAMP -STRING -DECIMAL -NULL -BINARY -CALENDAR -ARRAY -MAP -STRUCT -UDT - - Rint `rint` Rounds up a double value to the nearest double equal to an integer diff --git a/integration_tests/src/main/python/csv_test.py b/integration_tests/src/main/python/csv_test.py index 0aa1cb50b4d..9101ef56760 100644 --- a/integration_tests/src/main/python/csv_test.py +++ b/integration_tests/src/main/python/csv_test.py @@ -14,13 +14,13 @@ import pytest -from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_fallback_collect, assert_gpu_fallback_write +from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_fallback_collect, assert_gpu_fallback_write, assert_cpu_and_gpu_are_equal_collect_with_capture from conftest import get_non_gpu_allowed from datetime import datetime, timezone from data_gen import * from marks import * from pyspark.sql.types import * -from spark_session import with_cpu_session +from spark_session import with_cpu_session, is_before_spark_330 _acq_schema = StructType([ StructField('loan_id', LongType()), @@ -405,3 +405,23 @@ def test_csv_save_as_table_fallback(spark_tmp_path, spark_tmp_table_factory): lambda spark, path: spark.read.csv(path), data_path, 'DataWritingCommandExec') + +@pytest.mark.skipif(is_before_spark_330(), reason='Hidden file metadata columns are a new feature of Spark 330') +@allow_non_gpu(any = True) +@pytest.mark.parametrize('metadata_column', ["file_path", "file_name", "file_size", "file_modification_time"]) +def test_csv_scan_with_hidden_metadata_fallback(spark_tmp_path, metadata_column): + data_path = spark_tmp_path + "/hidden_metadata.csv" + with_cpu_session(lambda spark : spark.range(10) \ + .selectExpr("id") \ + .write \ + .mode("overwrite") \ + .csv(data_path)) + + def do_csv_scan(spark): + df = spark.read.csv(data_path).selectExpr("_c0", "_metadata.{}".format(metadata_column)) + return df + + assert_cpu_and_gpu_are_equal_collect_with_capture( + do_csv_scan, + exist_classes= "FileSourceScanExec", + non_exist_classes= "GpuBatchScanExec") \ No newline at end of file diff --git a/integration_tests/src/main/python/join_test.py b/integration_tests/src/main/python/join_test.py index 94289193485..e8414230b1b 100644 --- a/integration_tests/src/main/python/join_test.py +++ b/integration_tests/src/main/python/join_test.py @@ -15,7 +15,7 @@ import pytest from pyspark.sql.functions import broadcast from pyspark.sql.types import * -from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_fallback_collect +from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_fallback_collect, assert_cpu_and_gpu_are_equal_collect_with_capture from conftest import is_databricks_runtime, is_emr_runtime from data_gen import * from marks import ignore_order, allow_non_gpu, incompat, validate_execs_in_gpu_plan @@ -361,7 +361,7 @@ def do_join(spark): def test_right_broadcast_nested_loop_join_with_ast_condition(data_gen, join_type, batch_size): def do_join(spark): left, right = create_df(spark, data_gen, 50, 25) - # This test is impacted by https://github.com/NVIDIA/spark-rapids/issues/294 + # This test is impacted by https://github.com/NVIDIA/spark-rapids/issues/294 # if the sizes are large enough to have both 0.0 and -0.0 show up 500 and 250 # but these take a long time to verify so we run with smaller numbers by default # that do not expose the error @@ -651,7 +651,7 @@ def do_join(spark): if (cache_side == 'cache_left'): # Try to force the shuffle to be split between CPU and GPU for the join - # by default if the operation after the shuffle is not on the GPU then + # by default if the operation after the shuffle is not on the GPU then # don't do a GPU shuffle, so do something simple after the repartition # to make sure that the GPU shuffle is used. left = left.repartition('a').selectExpr('b + 1 as b', 'a').cache() @@ -659,7 +659,7 @@ def do_join(spark): else: #cache_right # Try to force the shuffle to be split between CPU and GPU for the join - # by default if the operation after the shuffle is not on the GPU then + # by default if the operation after the shuffle is not on the GPU then # don't do a GPU shuffle, so do something simple after the repartition # to make sure that the GPU shuffle is used. right = right.repartition('r_a').selectExpr('c + 1 as c', 'r_a').cache() @@ -785,3 +785,37 @@ def do_join(spark): return spark.sql("select a.* from {} a, {} b where a.name=b.name".format( resultdf_name, resultdf_name)) assert_gpu_and_cpu_are_equal_collect(do_join) + +# ExistenceJoin occurs in the context of existential subqueries (which is rewritten to SemiJoin) if +# there is an additional condition that may qualify left records even though they don't have +# join partner records from the right. +# +# Thus a query is rewritten roughly as a LeftOuter with an additional Boolean column "exists" added. +# which feeds into a filter "exists OR someOtherPredicate" +# If the condition is something like an AND, it makes the result a subset of a SemiJoin, and +# the optimizer won't use ExistenceJoin. +@ignore_order(local=True) +@pytest.mark.parametrize( + "allowFallback", [ + pytest.param('true', + marks=pytest.mark.allow_non_gpu('SortMergeJoinExec')), + pytest.param('false', + marks=pytest.mark.xfail(reason="https://github.com/NVIDIA/spark-rapids/issues/589")) + ], ids=idfn +) +def test_existence_join(allowFallback, spark_tmp_table_factory): + leftTable = spark_tmp_table_factory.get() + rightTable = spark_tmp_table_factory.get() + def do_join(spark): + # create non-overlapping ranges to have a mix of exists=true and exists=false + spark.createDataFrame([v] for v in range(2, 10)).createOrReplaceTempView(leftTable) + spark.createDataFrame([v] for v in range(0, 8)).createOrReplaceTempView(rightTable) + res = spark.sql(( + "select * " + "from {} as l " + "where l._1 < 0 " + " OR l._1 in (select * from {} as r)" + ).format(leftTable, rightTable)) + return res + assert_cpu_and_gpu_are_equal_collect_with_capture(do_join, r".+Join ExistenceJoin\(exists#[0-9]+\).+") + diff --git a/integration_tests/src/main/python/map_test.py b/integration_tests/src/main/python/map_test.py index 72aea6fbd2e..dae5c408cdd 100644 --- a/integration_tests/src/main/python/map_test.py +++ b/integration_tests/src/main/python/map_test.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_error, assert_gpu_fallback_collect from data_gen import * from marks import incompat, allow_non_gpu -from spark_session import is_before_spark_311 +from spark_session import is_before_spark_311, is_before_spark_330 from pyspark.sql.types import * from pyspark.sql.types import IntegralType import pyspark.sql.functions as f @@ -145,12 +145,13 @@ def test_map_scalar_project(): @pytest.mark.skipif(is_before_spark_311(), reason="Only in Spark 3.1.1 + ANSI mode, map key throws on no such element") @pytest.mark.parametrize('data_gen', [simple_string_to_string_map_gen], ids=idfn) def test_simple_get_map_value_ansi_fail(data_gen): + message = "org.apache.spark.SparkNoSuchElementException" if not is_before_spark_330() else "java.util.NoSuchElementException" assert_gpu_and_cpu_error( lambda spark: unary_op_df(spark, data_gen).selectExpr( 'a["NOT_FOUND"]').collect(), conf={'spark.sql.ansi.enabled':True, 'spark.sql.legacy.allowNegativeScaleOfDecimal': True}, - error_message='java.util.NoSuchElementException') + error_message=message) @pytest.mark.skipif(not is_before_spark_311(), reason="For Spark before 3.1.1 + ANSI mode, null will be returned instead of an exception if key is not found") @pytest.mark.parametrize('data_gen', [simple_string_to_string_map_gen], ids=idfn) @@ -176,12 +177,13 @@ def test_simple_element_at_map(data_gen): @pytest.mark.skipif(is_before_spark_311(), reason="Only in Spark 3.1.1 + ANSI mode, map key throws on no such element") @pytest.mark.parametrize('data_gen', [simple_string_to_string_map_gen], ids=idfn) def test_map_element_at_ansi_fail(data_gen): + message = "org.apache.spark.SparkNoSuchElementException" if not is_before_spark_330() else "java.util.NoSuchElementException" assert_gpu_and_cpu_error( lambda spark: unary_op_df(spark, data_gen).selectExpr( 'element_at(a, "NOT_FOUND")').collect(), conf={'spark.sql.ansi.enabled':True, 'spark.sql.legacy.allowNegativeScaleOfDecimal': True}, - error_message='java.util.NoSuchElementException') + error_message=message) @pytest.mark.skipif(not is_before_spark_311(), reason="For Spark before 3.1.1 + ANSI mode, null will be returned instead of an exception if key is not found") @pytest.mark.parametrize('data_gen', [simple_string_to_string_map_gen], ids=idfn) diff --git a/integration_tests/src/main/python/orc_test.py b/integration_tests/src/main/python/orc_test.py index e644e32a180..ba5fca0711c 100644 --- a/integration_tests/src/main/python/orc_test.py +++ b/integration_tests/src/main/python/orc_test.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,11 +14,11 @@ import pytest -from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_fallback_collect +from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_fallback_collect, assert_cpu_and_gpu_are_equal_collect_with_capture from data_gen import * from marks import * from pyspark.sql.types import * -from spark_session import with_cpu_session +from spark_session import with_cpu_session, is_before_spark_330 from parquet_test import _nested_pruning_schemas pytestmark = pytest.mark.nightly_resource_consuming_test @@ -444,3 +444,24 @@ def test_read_with_more_columns(spark_tmp_path, orc_gen, reader_confs, v1_enable assert_gpu_and_cpu_are_equal_collect( lambda spark : spark.read.schema(rs).orc(data_path), conf=all_confs) + +@pytest.mark.skipif(is_before_spark_330(), reason='Hidden file metadata columns are a new feature of Spark 330') +@allow_non_gpu(any = True) +@pytest.mark.parametrize('metadata_column', ["file_path", "file_name", "file_size", "file_modification_time"]) +def test_orc_scan_with_hidden_metadata_fallback(spark_tmp_path, metadata_column): + data_path = spark_tmp_path + "/hidden_metadata.orc" + with_cpu_session(lambda spark : spark.range(10) \ + .selectExpr("id", "id % 3 as p") \ + .write \ + .partitionBy("p") \ + .mode("overwrite") \ + .orc(data_path)) + + def do_orc_scan(spark): + df = spark.read.orc(data_path).selectExpr("id", "_metadata.{}".format(metadata_column)) + return df + + assert_cpu_and_gpu_are_equal_collect_with_capture( + do_orc_scan, + exist_classes= "FileSourceScanExec", + non_exist_classes= "GpuBatchScanExec") \ No newline at end of file diff --git a/integration_tests/src/main/python/parquet_test.py b/integration_tests/src/main/python/parquet_test.py index 516cef3e7a5..7d1d9e1a6b3 100644 --- a/integration_tests/src/main/python/parquet_test.py +++ b/integration_tests/src/main/python/parquet_test.py @@ -18,6 +18,7 @@ from data_gen import * from marks import * from pyspark.sql.types import * +from pyspark.sql.functions import * from spark_session import with_cpu_session, with_gpu_session, is_before_spark_330 def read_parquet_df(data_path): @@ -728,3 +729,24 @@ def do_parquet_scan(spark): exist_classes= "BatchScanExec", non_exist_classes= "GpuBatchScanExec", conf = conf_for_parquet_aggregate_pushdown) + +@pytest.mark.skipif(is_before_spark_330(), reason='Hidden file metadata columns are a new feature of Spark 330') +@allow_non_gpu(any = True) +@pytest.mark.parametrize('metadata_column', ["file_path", "file_name", "file_size", "file_modification_time"]) +def test_parquet_scan_with_hidden_metadata_fallback(spark_tmp_path, metadata_column): + data_path = spark_tmp_path + "/hidden_metadata.parquet" + with_cpu_session(lambda spark : spark.range(10) \ + .selectExpr("id", "id % 3 as p") \ + .write \ + .partitionBy("p") \ + .mode("overwrite") \ + .parquet(data_path)) + + def do_parquet_scan(spark): + df = spark.read.parquet(data_path).selectExpr("id", "_metadata.{}".format(metadata_column)) + return df + + assert_cpu_and_gpu_are_equal_collect_with_capture( + do_parquet_scan, + exist_classes= "FileSourceScanExec", + non_exist_classes= "GpuBatchScanExec") \ No newline at end of file diff --git a/integration_tests/src/main/python/string_test.py b/integration_tests/src/main/python/string_test.py index 296a1bad26c..857ee4295c2 100644 --- a/integration_tests/src/main/python/string_test.py +++ b/integration_tests/src/main/python/string_test.py @@ -401,6 +401,12 @@ def test_length(): 'CHAR_LENGTH(a)', 'CHARACTER_LENGTH(a)')) +def test_byte_length(): + gen = mk_str_gen('.{0,5}TEST[\ud720 A]{0,5}') + assert_gpu_and_cpu_are_equal_collect( + lambda spark: unary_op_df(spark, gen).selectExpr( + 'BIT_LENGTH(a)', 'OCTET_LENGTH(a)')) + @incompat def test_initcap(): # Because we don't use the same unicode version we need to limit @@ -712,4 +718,4 @@ def test_rlike_fallback_possessive_quantifier(): lambda spark: unary_op_df(spark, gen).selectExpr( 'a rlike "a*+"'), 'RLike', - conf={'spark.rapids.sql.expression.RLike': 'true'}) \ No newline at end of file + conf={'spark.rapids.sql.expression.RLike': 'true'}) diff --git a/integration_tests/src/test/scala/com/nvidia/spark/rapids/tests/mortgage/MortgageSparkSuite.scala b/integration_tests/src/test/scala/com/nvidia/spark/rapids/tests/mortgage/MortgageSparkSuite.scala index a9263c2ed36..145905df727 100644 --- a/integration_tests/src/test/scala/com/nvidia/spark/rapids/tests/mortgage/MortgageSparkSuite.scala +++ b/integration_tests/src/test/scala/com/nvidia/spark/rapids/tests/mortgage/MortgageSparkSuite.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,16 +17,21 @@ package com.nvidia.spark.rapids.tests.mortgage import com.nvidia.spark.rapids.ShimLoader -import org.scalatest.{BeforeAndAfterAll, FunSuite} +import org.scalatest.FunSuite import org.apache.spark.sql.SparkSession import org.apache.spark.sql.functions._ -class MortgageSparkSuite extends FunSuite with BeforeAndAfterAll { +class MortgageSparkSuite extends FunSuite { /** * This is intentionally a def rather than a val so that scalatest uses the correct value (from * this class or the derived class) when registering tests. + * + * @note You are likely to see device/host leaks from this test when using the + * RAPIDS Shuffle Manager. The reason for that is a race between cuDF's MemoryCleaner + * and the SparkContext shutdown. Because of this, shuffle buffers cached may not get + * cleaned (on shuffle unregister) when the MemoryCleaner exits. */ def adaptiveQueryEnabled = false @@ -63,11 +68,6 @@ class MortgageSparkSuite extends FunSuite with BeforeAndAfterAll { builder.getOrCreate() } - // Close the session to avoid hanging after all cases are completed - override def afterAll() = { - session.close() - } - test("extract mortgage data") { val df = Run.csv( session, diff --git a/jenkins/databricks/build.sh b/jenkins/databricks/build.sh index 261dd6bec0a..ef8521aea95 100755 --- a/jenkins/databricks/build.sh +++ b/jenkins/databricks/build.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -87,11 +87,19 @@ then PARQUETHADOOPJAR=----workspace_${SPARK_MAJOR_VERSION_STRING}--maven-trees--hive-2.3__hadoop-2.7--org.apache.parquet--parquet-hadoop--org.apache.parquet__parquet-hadoop__1.10.1-databricks9.jar PARQUETCOMMONJAR=----workspace_${SPARK_MAJOR_VERSION_STRING}--maven-trees--hive-2.3__hadoop-2.7--org.apache.parquet--parquet-common--org.apache.parquet__parquet-common__1.10.1-databricks9.jar PARQUETCOLUMNJAR=----workspace_${SPARK_MAJOR_VERSION_STRING}--maven-trees--hive-2.3__hadoop-2.7--org.apache.parquet--parquet-column--org.apache.parquet__parquet-column__1.10.1-databricks9.jar + ORC_CORE_JAR=----workspace_${SPARK_MAJOR_VERSION_STRING}--maven-trees--hive-2.3__hadoop-2.7--org.apache.orc--orc-core--org.apache.orc__orc-core__1.5.12.jar + ORC_SHIM_JAR=----workspace_${SPARK_MAJOR_VERSION_STRING}--maven-trees--hive-2.3__hadoop-2.7--org.apache.orc--orc-shims--org.apache.orc__orc-shims__1.5.12.jar + ORC_MAPREDUCE_JAR=----workspace_${SPARK_MAJOR_VERSION_STRING}--maven-trees--hive-2.3__hadoop-2.7--org.apache.orc--orc-mapreduce--org.apache.orc__orc-mapreduce__1.5.12.jar else PARQUETHADOOPJAR=----workspace_${SPARK_MAJOR_VERSION_STRING}--maven-trees--hive-2.3__hadoop-2.7--org.apache.parquet--parquet-hadoop--org.apache.parquet__parquet-hadoop__1.10.1-databricks6.jar PARQUETCOMMONJAR=----workspace_${SPARK_MAJOR_VERSION_STRING}--maven-trees--hive-2.3__hadoop-2.7--org.apache.parquet--parquet-common--org.apache.parquet__parquet-common__1.10.1-databricks6.jar PARQUETCOLUMNJAR=----workspace_${SPARK_MAJOR_VERSION_STRING}--maven-trees--hive-2.3__hadoop-2.7--org.apache.parquet--parquet-column--org.apache.parquet__parquet-column__1.10.1-databricks6.jar + ORC_CORE_JAR=----workspace_${SPARK_MAJOR_VERSION_STRING}--maven-trees--hive-2.3__hadoop-2.7--org.apache.orc--orc-core--org.apache.orc__orc-core__1.5.10.jar + ORC_SHIM_JAR=----workspace_${SPARK_MAJOR_VERSION_STRING}--maven-trees--hive-2.3__hadoop-2.7--org.apache.orc--orc-shims--org.apache.orc__orc-shims__1.5.10.jar + ORC_MAPREDUCE_JAR=----workspace_${SPARK_MAJOR_VERSION_STRING}--maven-trees--hive-2.3__hadoop-2.7--org.apache.orc--orc-mapreduce--org.apache.orc__orc-mapreduce__1.5.10.jar fi + +PROTOBUF_JAR=----workspace_${SPARK_MAJOR_VERSION_STRING}--maven-trees--hive-2.3__hadoop-2.7--com.google.protobuf--protobuf-java--com.google.protobuf__protobuf-java__2.6.1.jar PARQUETFORMATJAR=----workspace_${SPARK_MAJOR_VERSION_STRING}--maven-trees--hive-2.3__hadoop-2.7--org.apache.parquet--parquet-format--org.apache.parquet__parquet-format__2.4.0.jar NETWORKCOMMON=----workspace_${SPARK_MAJOR_VERSION_STRING}--common--network-common--network-common-hive-2.3__hadoop-2.7_2.12_deploy.jar @@ -363,6 +371,38 @@ mvn -B install:install-file \ -Dversion=$SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS \ -Dpackaging=jar +mvn -B install:install-file \ + -Dmaven.repo.local=$M2DIR \ + -Dfile=$JARDIR/$ORC_CORE_JAR \ + -DgroupId=org.apache.orc \ + -DartifactId=orc-core \ + -Dversion=$SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS \ + -Dpackaging=jar + +mvn -B install:install-file \ + -Dmaven.repo.local=$M2DIR \ + -Dfile=$JARDIR/$ORC_SHIM_JAR \ + -DgroupId=org.apache.orc \ + -DartifactId=orc-shims \ + -Dversion=$SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS \ + -Dpackaging=jar + +mvn -B install:install-file \ + -Dmaven.repo.local=$M2DIR \ + -Dfile=$JARDIR/$ORC_MAPREDUCE_JAR \ + -DgroupId=org.apache.orc \ + -DartifactId=orc-mapreduce \ + -Dversion=$SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS \ + -Dpackaging=jar + +mvn -B install:install-file \ + -Dmaven.repo.local=$M2DIR \ + -Dfile=$JARDIR/$PROTOBUF_JAR \ + -DgroupId=com.google.protobuf \ + -DartifactId=protobuf-java \ + -Dversion=$SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS \ + -Dpackaging=jar + mvn -B -Ddatabricks -Dbuildver=$BUILDVER clean package -DskipTests cd /home/ubuntu diff --git a/jenkins/spark-tests.sh b/jenkins/spark-tests.sh index 10611c8e3f3..a78838edcde 100755 --- a/jenkins/spark-tests.sh +++ b/jenkins/spark-tests.sh @@ -258,8 +258,8 @@ if [[ $TEST_MODE == "ALL" || $TEST_MODE == "IT_ONLY" ]]; then PARALLELISM=$(nvidia-smi --query-gpu=memory.free --format=csv,noheader | \ awk '{if (MAX < $1){ MAX = $1}} END {print int(MAX / (2 * 1024))}') fi - # parallelism > 8 could slow down the whole process, so we have a limitation for it - [[ ${PARALLELISM} -gt 8 ]] && PARALLELISM=8 + # parallelism > 7 could slow down the whole process, so we have a limitation for it + [[ ${PARALLELISM} -gt 7 ]] && PARALLELISM=7 MEMORY_FRACTION=$(python -c "print(1/($PARALLELISM + 0.1))") export MEMORY_FRACTION_CONF="--conf spark.rapids.memory.gpu.allocFraction=${MEMORY_FRACTION} \ --conf spark.rapids.memory.gpu.maxAllocFraction=${MEMORY_FRACTION}" diff --git a/pom.xml b/pom.xml index bc79b7eb526..23dbfa761bb 100644 --- a/pom.xml +++ b/pom.xml @@ -118,6 +118,7 @@ ${project.basedir}/src/main/301until310-all/scala ${project.basedir}/src/main/301until310-nondb/scala ${project.basedir}/src/main/301until320-all/scala + ${project.basedir}/src/main/301until320-noncdh/scala ${project.basedir}/src/main/301until320-nondb/scala ${project.basedir}/src/main/301until330-all/scala ${project.basedir}/src/main/pre320-treenode/scala @@ -129,6 +130,7 @@ + common dist integration_tests shuffle-plugin @@ -166,6 +168,7 @@ ${project.basedir}/src/main/301until310-all/scala ${project.basedir}/src/main/301until310-nondb/scala ${project.basedir}/src/main/301until320-all/scala + ${project.basedir}/src/main/301until320-noncdh/scala ${project.basedir}/src/main/301until320-nondb/scala ${project.basedir}/src/main/301until330-all/scala ${project.basedir}/src/main/pre320-treenode/scala @@ -182,6 +185,7 @@ spark302 + common dist integration_tests shuffle-plugin @@ -223,6 +227,7 @@ ${project.basedir}/src/main/301until310-all/scala ${project.basedir}/src/main/301until310-nondb/scala ${project.basedir}/src/main/301until320-all/scala + ${project.basedir}/src/main/301until320-noncdh/scala ${project.basedir}/src/main/301until320-nondb/scala ${project.basedir}/src/main/301until330-all/scala ${project.basedir}/src/main/pre320-treenode/scala @@ -234,6 +239,7 @@ + common dist integration_tests shuffle-plugin @@ -275,6 +281,7 @@ ${project.basedir}/src/main/301until310-all/scala ${project.basedir}/src/main/301until310-nondb/scala ${project.basedir}/src/main/301until320-all/scala + ${project.basedir}/src/main/301until320-noncdh/scala ${project.basedir}/src/main/301until320-nondb/scala ${project.basedir}/src/main/301until330-all/scala ${project.basedir}/src/main/pre320-treenode/scala @@ -286,6 +293,7 @@ + common dist integration_tests shuffle-plugin @@ -325,6 +333,7 @@ ${project.basedir}/src/main/301+-nondb/scala ${project.basedir}/src/main/311/scala ${project.basedir}/src/main/301until320-all/scala + ${project.basedir}/src/main/301until320-noncdh/scala ${project.basedir}/src/main/301until320-nondb/scala ${project.basedir}/src/main/301until330-all/scala ${project.basedir}/src/main/311+-all/scala @@ -342,6 +351,7 @@ + common dist integration_tests shuffle-plugin @@ -378,8 +388,6 @@ ${spark301db.version} ${spark301db.version} true - ${spark301db.version} - ${spark301db.version} @@ -396,6 +404,7 @@ ${project.basedir}/src/main/301db/scala ${project.basedir}/src/main/301until310-all/scala ${project.basedir}/src/main/301until320-all/scala + ${project.basedir}/src/main/301until320-noncdh/scala ${project.basedir}/src/main/301until330-all/scala ${project.basedir}/src/main/pre320-treenode/scala @@ -406,6 +415,7 @@ + common dist integration_tests shuffle-plugin @@ -439,8 +449,6 @@ ${spark312db.version} ${spark312db.version} true - ${spark312db.version} - ${spark312db.version} @@ -455,6 +463,7 @@ ${project.basedir}/src/main/301until320-all/scala + ${project.basedir}/src/main/301until320-noncdh/scala ${project.basedir}/src/main/312db/scala ${project.basedir}/src/main/301until330-all/scala ${project.basedir}/src/main/311until320-all/scala @@ -471,6 +480,7 @@ + common dist integration_tests shuffle-plugin @@ -508,6 +518,7 @@ ${project.basedir}/src/main/301+-nondb/scala ${project.basedir}/src/main/312/scala ${project.basedir}/src/main/301until320-all/scala + ${project.basedir}/src/main/301until320-noncdh/scala ${project.basedir}/src/main/301until320-nondb/scala ${project.basedir}/src/main/301until330-all/scala ${project.basedir}/src/main/311+-all/scala @@ -525,6 +536,7 @@ + common dist integration_tests shuffle-plugin @@ -565,6 +577,7 @@ ${project.basedir}/src/main/301+-nondb/scala ${project.basedir}/src/main/313/scala ${project.basedir}/src/main/301until320-all/scala + ${project.basedir}/src/main/301until320-noncdh/scala ${project.basedir}/src/main/301until320-nondb/scala ${project.basedir}/src/main/301until330-all/scala ${project.basedir}/src/main/311+-all/scala @@ -582,6 +595,7 @@ + common dist integration_tests shuffle-plugin @@ -646,6 +660,7 @@ + common dist integration_tests shuffle-plugin @@ -709,6 +724,7 @@ + common dist integration_tests shuffle-plugin @@ -772,6 +788,7 @@ + common dist integration_tests shuffle-plugin @@ -834,6 +851,7 @@ + common dist integration_tests shuffle-plugin @@ -895,6 +913,7 @@ + common dist integration_tests shuffle-plugin @@ -963,8 +982,6 @@ 22.04.0-SNAPSHOT 2.12 2.12.15 - 1.5.10 - org.rogach diff --git a/scripts/generate-changelog b/scripts/generate-changelog index 0c313f6ec5a..d7459684a52 100755 --- a/scripts/generate-changelog +++ b/scripts/generate-changelog @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -46,11 +46,11 @@ Usage: # generate changelog for releases 0.1 to 21.12 scripts/generate-changelog --token= \ - --releases=0.1,0.2,0.3,0.4,0.4.1,0.5,21.06,21.06.1,21.06.2,21.08,21.08.1,21.10,21.12 + --releases=21.06,21.06.1,21.06.2,21.08,21.08.1,21.10,21.12,22.02 # generate changelog for releases 0.1 to 21.12 to /tmp/CHANGELOG.md GITHUB_TOKEN= scripts/generate-changelog \ - --releases=0.1,0.2,0.3,0.4,0.4.1,0.5,21.06,21.06.1,21.06.2,21.08,21.08.1,21.10,21.12 \ + --releases=21.06,21.06.1,21.06.2,21.08,21.08.1,21.10,21.12,22.02 \ --path=/tmp/CHANGELOG.md """ import os @@ -273,6 +273,8 @@ def form_changelog(path: str, changelog: dict): subsections += form_subsection(issues, PRS) markdown = f"""# Change log Generated on {date.today()}{subsections} +\n## Older Releases +Changelog of older releases can be found at [docs/archives](/docs/archives) """ with open(path, "w") as file: file.write(markdown) diff --git a/shuffle-plugin/pom.xml b/shuffle-plugin/pom.xml index ed1654161ba..410106f07ac 100644 --- a/shuffle-plugin/pom.xml +++ b/shuffle-plugin/pom.xml @@ -47,6 +47,11 @@ 1.11 compile + + com.nvidia + rapids-4-spark-common_${scala.binary.version} + ${project.version} + com.nvidia rapids-4-spark-sql_${scala.binary.version} diff --git a/shuffle-plugin/src/main/scala/com/nvidia/spark/rapids/shuffle/ucx/UCX.scala b/shuffle-plugin/src/main/scala/com/nvidia/spark/rapids/shuffle/ucx/UCX.scala index 177eeef1a5c..b9557fb5f63 100644 --- a/shuffle-plugin/src/main/scala/com/nvidia/spark/rapids/shuffle/ucx/UCX.scala +++ b/shuffle-plugin/src/main/scala/com/nvidia/spark/rapids/shuffle/ucx/UCX.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,9 +25,9 @@ import java.util.concurrent.atomic.AtomicLong import scala.collection.mutable.ArrayBuffer import ai.rapids.cudf.{BaseDeviceMemoryBuffer, MemoryBuffer, NvtxColor, NvtxRange} -import com.google.common.util.concurrent.ThreadFactoryBuilder import com.nvidia.spark.rapids.{Arm, GpuDeviceManager, RapidsConf} import com.nvidia.spark.rapids.RapidsPluginImplicits._ +import com.nvidia.spark.rapids.ThreadFactoryBuilder import com.nvidia.spark.rapids.shuffle.{ClientConnection, MemoryRegistrationCallback, MessageType, MetadataTransportBuffer, TransportBuffer, TransportUtils} import org.openucx.jucx._ import org.openucx.jucx.ucp._ diff --git a/shuffle-plugin/src/main/scala/com/nvidia/spark/rapids/shuffle/ucx/UCXShuffleTransport.scala b/shuffle-plugin/src/main/scala/com/nvidia/spark/rapids/shuffle/ucx/UCXShuffleTransport.scala index 942a519b5cf..d7b22affda7 100644 --- a/shuffle-plugin/src/main/scala/com/nvidia/spark/rapids/shuffle/ucx/UCXShuffleTransport.scala +++ b/shuffle-plugin/src/main/scala/com/nvidia/spark/rapids/shuffle/ucx/UCXShuffleTransport.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,8 +23,8 @@ import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import ai.rapids.cudf.{BaseDeviceMemoryBuffer, CudaMemoryBuffer, DeviceMemoryBuffer, HostMemoryBuffer, MemoryBuffer} -import com.google.common.util.concurrent.ThreadFactoryBuilder import com.nvidia.spark.rapids.{GpuDeviceManager, HashedPriorityQueue, RapidsConf} +import com.nvidia.spark.rapids.ThreadFactoryBuilder import com.nvidia.spark.rapids.shuffle._ import com.nvidia.spark.rapids.shuffle.{BounceBufferManager, BufferReceiveState, ClientConnection, PendingTransferRequest, RapidsShuffleClient, RapidsShuffleRequestHandler, RapidsShuffleServer, RapidsShuffleTransport, RefCountedDirectByteBuffer} diff --git a/sql-plugin/pom.xml b/sql-plugin/pom.xml index 33417cc2c81..35766938d06 100644 --- a/sql-plugin/pom.xml +++ b/sql-plugin/pom.xml @@ -36,9 +36,9 @@ ${cuda.version} - com.google.flatbuffers - flatbuffers-java - compile + com.nvidia + rapids-4-spark-common_${scala.binary.version} + ${project.version} org.scala-lang @@ -49,41 +49,12 @@ scalatest_${scala.binary.version} test + + + - org.apache.orc - orc-core - ${orc.classifier} - - - org.slf4j - slf4j-api - - - - - org.apache.orc - orc-mapreduce - ${orc.classifier} - - - com.google.code.findbugs - jsr305 - - - - - org.apache.hive - hive-storage-api - - - org.slf4j - slf4j-api - - - - - com.google.protobuf - protobuf-java + com.google.flatbuffers + flatbuffers-java @@ -290,6 +261,36 @@ ${spark.version} provided + + org.apache.orc + orc-core + ${spark.version} + provided + + + org.apache.orc + orc-shims + ${spark.version} + provided + + + org.apache.orc + orc-mapreduce + ${spark.version} + provided + + + org.apache.hive + hive-storage-api + ${spark.version} + provided + + + com.google.protobuf + protobuf-java + ${spark.version} + provided + diff --git a/sql-plugin/src/main/301until320-all/scala/com/nvidia/spark/rapids/shims/v2/OrcShims301until320Base.scala b/sql-plugin/src/main/301until320-all/scala/com/nvidia/spark/rapids/shims/v2/OrcShims301until320Base.scala new file mode 100644 index 00000000000..543cb7c9afd --- /dev/null +++ b/sql-plugin/src/main/301until320-all/scala/com/nvidia/spark/rapids/shims/v2/OrcShims301until320Base.scala @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.nvidia.spark.rapids.shims.v2 + +import scala.collection.mutable.ArrayBuffer + +import com.nvidia.spark.rapids.OrcOutputStripe +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.hive.common.io.DiskRangeList +import org.apache.orc.{CompressionCodec, CompressionKind, DataReader, OrcFile, OrcProto, PhysicalWriter, Reader, StripeInformation} +import org.apache.orc.impl.{DataReaderProperties, OutStream, SchemaEvolution} +import org.apache.orc.impl.RecordReaderImpl.SargApplier + +trait OrcShims301until320Base { + + // read data to buffer + def readFileData(dataReader: DataReader, inputDataRanges: DiskRangeList): DiskRangeList = { + dataReader.readFileData(inputDataRanges, 0, false) + } + + // create reader properties builder + def newDataReaderPropertiesBuilder(compressionSize: Int, + compressionKind: CompressionKind, typeCount: Int): DataReaderProperties.Builder = { + DataReaderProperties.builder() + .withBufferSize(compressionSize) + .withCompression(compressionKind) + .withTypeCount(typeCount) + } + + // create ORC out stream + def newOrcOutStream(name: String, bufferSize: Int, codec: CompressionCodec, + receiver: PhysicalWriter.OutputReceiver): OutStream = { + new OutStream(name, bufferSize, codec, receiver) + } + + // filter stripes by pushing down filter + def filterStripes( + stripes: Seq[StripeInformation], + conf: Configuration, + orcReader: Reader, + dataReader: DataReader, + gen: (StripeInformation, OrcProto.StripeFooter, Array[Int], Array[Int]) => OrcOutputStripe, + evolution: SchemaEvolution, + sargApp: SargApplier, + sargColumns: Array[Boolean], + ignoreNonUtf8BloomFilter: Boolean, + writerVersion: OrcFile.WriterVersion, + fileIncluded: Array[Boolean], + columnMapping: Array[Int], + idMapping: Array[Int]): ArrayBuffer[OrcOutputStripe] = { + val result = new ArrayBuffer[OrcOutputStripe](stripes.length) + stripes.foreach { stripe => + val stripeFooter = dataReader.readStripeFooter(stripe) + val needStripe = if (sargApp != null) { + // An ORC schema is a single struct type describing the schema fields + val orcFileSchema = evolution.getFileType(0) + val orcIndex = dataReader.readRowIndex(stripe, orcFileSchema, stripeFooter, + ignoreNonUtf8BloomFilter, fileIncluded, null, sargColumns, + writerVersion, null, null) + val rowGroups = sargApp.pickRowGroups(stripe, orcIndex.getRowGroupIndex, + orcIndex.getBloomFilterKinds, stripeFooter.getColumnsList, orcIndex.getBloomFilterIndex, + true) + rowGroups != SargApplier.READ_NO_RGS + } else { + true + } + + if (needStripe) { + result.append(gen(stripe, stripeFooter, columnMapping, idMapping)) + } + } + result + } +} diff --git a/sql-plugin/src/main/301until320-noncdh/scala/com/nvidia/spark/rapids/shims/v2/OrcShims.scala b/sql-plugin/src/main/301until320-noncdh/scala/com/nvidia/spark/rapids/shims/v2/OrcShims.scala new file mode 100644 index 00000000000..dcac01eefe9 --- /dev/null +++ b/sql-plugin/src/main/301until320-noncdh/scala/com/nvidia/spark/rapids/shims/v2/OrcShims.scala @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.nvidia.spark.rapids.shims.v2 + +import com.nvidia.spark.rapids.RapidsPluginImplicits._ +import org.apache.orc.Reader + +object OrcShims extends OrcShims301until320Base { + + // the ORC Reader in non CDH Spark is closeable + def withReader[T <: AutoCloseable, V](r: T)(block: T => V): V = { + try { + block(r) + } finally { + r.safeClose() + } + } + + // the ORC Reader in non CDH Spark is closeable + def closeReader(reader: Reader): Unit = { + if (reader != null) { + reader.close() + } + } +} diff --git a/sql-plugin/src/main/301until330-all/scala/com/nvidia/spark/rapids/shims/v2/RapidsErrorUtils.scala b/sql-plugin/src/main/301until330-all/scala/com/nvidia/spark/rapids/shims/v2/RapidsErrorUtils.scala index 8105b2349df..650758258f6 100644 --- a/sql-plugin/src/main/301until330-all/scala/com/nvidia/spark/rapids/shims/v2/RapidsErrorUtils.scala +++ b/sql-plugin/src/main/301until330-all/scala/com/nvidia/spark/rapids/shims/v2/RapidsErrorUtils.scala @@ -23,4 +23,11 @@ object RapidsErrorUtils { throw new ArrayIndexOutOfBoundsException(s"index $index is beyond the max index allowed " + s"${numElements - 1}") } + + def throwInvalidElementAtIndexError( + elementKey: String, isElementAtFunction: Boolean = false): ColumnVector = { + // For now, the default argument is false. The caller sets the correct value accordingly. + throw new NoSuchElementException(s"Key: ${elementKey} " + + s"does not exist in any one of the rows in the map column") + } } diff --git a/sql-plugin/src/main/311cdh/scala/com/nvidia/spark/rapids/shims/v2/OrcShims.scala b/sql-plugin/src/main/311cdh/scala/com/nvidia/spark/rapids/shims/v2/OrcShims.scala new file mode 100644 index 00000000000..ddc4534cb39 --- /dev/null +++ b/sql-plugin/src/main/311cdh/scala/com/nvidia/spark/rapids/shims/v2/OrcShims.scala @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.nvidia.spark.rapids.shims.v2 + +import org.apache.orc.Reader + +object OrcShims extends OrcShims301until320Base { + + // ORC Reader of the 311cdh Spark has no close method. + // The resource is closed internally. + def withReader[V](r: Reader)(block: Reader => V): V = { + block(r) + } + + // empty + def closeReader(reader: Reader): Unit = { + } + +} diff --git a/sql-plugin/src/main/320+/scala/com/nvidia/spark/rapids/shims/v2/OrcShims.scala b/sql-plugin/src/main/320+/scala/com/nvidia/spark/rapids/shims/v2/OrcShims.scala new file mode 100644 index 00000000000..7a0fed4abc9 --- /dev/null +++ b/sql-plugin/src/main/320+/scala/com/nvidia/spark/rapids/shims/v2/OrcShims.scala @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.nvidia.spark.rapids.shims.v2 + +import scala.collection.mutable.ArrayBuffer + +import com.nvidia.spark.rapids.OrcOutputStripe +import com.nvidia.spark.rapids.RapidsPluginImplicits._ +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.hive.common.io.DiskRangeList +import org.apache.orc.{CompressionCodec, CompressionKind, DataReader, OrcConf, OrcFile, OrcProto, PhysicalWriter, Reader, StripeInformation} +import org.apache.orc.impl.{BufferChunk, BufferChunkList, DataReaderProperties, InStream, OrcCodecPool, OutStream, ReaderImpl, SchemaEvolution} +import org.apache.orc.impl.RecordReaderImpl.SargApplier +import org.apache.orc.impl.reader.StripePlanner +import org.apache.orc.impl.writer.StreamOptions + +// 320+ ORC shims +object OrcShims { + + // the ORC Reader in non-CDH Spark is closeable + def withReader[T <: Reader, V](r: T)(block: T => V): V = { + try { + block(r) + } finally { + r.safeClose() + } + } + + // the ORC Reader in non-CDH Spark is closeable + def closeReader(reader: Reader): Unit = { + if(reader != null) { + reader.close() + } + } + + // read data to buffer + def readFileData(dataReader: DataReader, inputDataRanges: DiskRangeList): DiskRangeList = { + + // convert DiskRangeList to BufferChunkList + val chuckList = new BufferChunkList + var curr = inputDataRanges + while (curr != null) { + chuckList.add(new BufferChunk(curr.getOffset, curr.getLength)) + curr = curr.next + } + + // BufferChunk is subclass of DiskRangeList + dataReader.readFileData(chuckList, false).get() + } + + // create reader properties builder + def newDataReaderPropertiesBuilder(compressionSize: Int, + compressionKind: CompressionKind, typeCount: Int): DataReaderProperties.Builder = { + val compression = new InStream.StreamOptions() + .withBufferSize(compressionSize).withCodec(OrcCodecPool.getCodec(compressionKind)) + DataReaderProperties.builder().withCompression(compression) + } + + // create ORC out stream + def newOrcOutStream(name: String, bufferSize: Int, codec: CompressionCodec, + receiver: PhysicalWriter.OutputReceiver): OutStream = { + val options = new StreamOptions(bufferSize).withCodec(codec, codec.getDefaultOptions) + new OutStream(name, options, receiver) + } + + // filter stripes by pushing down filter + def filterStripes( + stripes: Seq[StripeInformation], + conf: Configuration, + orcReader: Reader, + dataReader: DataReader, + gen: (StripeInformation, OrcProto.StripeFooter, Array[Int], Array[Int]) => OrcOutputStripe, + evolution: SchemaEvolution, + sargApp: SargApplier, + sargColumns: Array[Boolean], + ignoreNonUtf8BloomFilter: Boolean, + writerVersion: OrcFile.WriterVersion, + fileIncluded: Array[Boolean], + columnMapping: Array[Int], + idMapping: Array[Int]): ArrayBuffer[OrcOutputStripe] = { + + val orcReaderImpl = orcReader.asInstanceOf[ReaderImpl] + val maxDiskRangeChunkLimit = OrcConf.ORC_MAX_DISK_RANGE_CHUNK_LIMIT.getInt(conf) + val planner = new StripePlanner(evolution.getFileSchema, orcReaderImpl.getEncryption(), + dataReader, writerVersion, ignoreNonUtf8BloomFilter, maxDiskRangeChunkLimit) + + val result = new ArrayBuffer[OrcOutputStripe](stripes.length) + stripes.foreach { stripe => + val stripeFooter = dataReader.readStripeFooter(stripe) + val needStripe = if (sargApp != null) { + val orcIndex = planner.parseStripe(stripe, fileIncluded).readRowIndex(sargColumns, null) + val rowGroups = sargApp.pickRowGroups(stripe, orcIndex.getRowGroupIndex, + orcIndex.getBloomFilterKinds, stripeFooter.getColumnsList, orcIndex.getBloomFilterIndex, + true) + rowGroups != SargApplier.READ_NO_RGS + } else { + true + } + + if (needStripe) { + result.append(gen(stripe, stripeFooter, columnMapping, idMapping)) + } + } + result + + } +} diff --git a/sql-plugin/src/main/320+/scala/com/nvidia/spark/rapids/shims/v2/Spark320PlusShims.scala b/sql-plugin/src/main/320+/scala/com/nvidia/spark/rapids/shims/v2/Spark320PlusShims.scala index 606b2f8be00..662008e9fab 100644 --- a/sql-plugin/src/main/320+/scala/com/nvidia/spark/rapids/shims/v2/Spark320PlusShims.scala +++ b/sql-plugin/src/main/320+/scala/com/nvidia/spark/rapids/shims/v2/Spark320PlusShims.scala @@ -558,7 +558,7 @@ trait Spark320PlusShims extends SparkShims with RebaseShims with Logging { // partition filters and data filters are not run on the GPU override val childExprs: Seq[ExprMeta[_]] = Seq.empty - override def tagPlanForGpu(): Unit = GpuFileSourceScanExec.tagSupport(this) + override def tagPlanForGpu(): Unit = tagFileSourceScanExec(this) override def convertToCpu(): SparkPlan = { wrapped.copy(partitionFilters = partitionFilters) @@ -1051,4 +1051,8 @@ trait Spark320PlusShims extends SparkShims with RebaseShims with Logging { } override def supportsColumnarAdaptivePlans: Boolean = true + + def tagFileSourceScanExec(meta: SparkPlanMeta[FileSourceScanExec]): Unit = { + GpuFileSourceScanExec.tagSupport(meta) + } } diff --git a/sql-plugin/src/main/330+/scala/com/nvidia/spark/rapids/shims/v2/RapidsErrorUtils.scala b/sql-plugin/src/main/330+/scala/com/nvidia/spark/rapids/shims/v2/RapidsErrorUtils.scala index de67b5e5cf7..99b943a0e71 100644 --- a/sql-plugin/src/main/330+/scala/com/nvidia/spark/rapids/shims/v2/RapidsErrorUtils.scala +++ b/sql-plugin/src/main/330+/scala/com/nvidia/spark/rapids/shims/v2/RapidsErrorUtils.scala @@ -24,4 +24,10 @@ object RapidsErrorUtils { def throwArrayIndexOutOfBoundsException(index: Int, numElements: Int): ColumnVector = { throw QueryExecutionErrors.invalidArrayIndexError(index, numElements) } + + def throwInvalidElementAtIndexError( + elementKey: String, isElementAtFunction: Boolean = false): ColumnVector = { + // For now, the default argument is false. The caller sets the correct value accordingly. + throw QueryExecutionErrors.mapKeyNotExistError(elementKey, isElementAtFunction) + } } diff --git a/sql-plugin/src/main/330+/scala/com/nvidia/spark/rapids/shims/v2/Spark33XShims.scala b/sql-plugin/src/main/330+/scala/com/nvidia/spark/rapids/shims/v2/Spark33XShims.scala index bad7c3d8e4d..0294f602a7d 100644 --- a/sql-plugin/src/main/330+/scala/com/nvidia/spark/rapids/shims/v2/Spark33XShims.scala +++ b/sql-plugin/src/main/330+/scala/com/nvidia/spark/rapids/shims/v2/Spark33XShims.scala @@ -22,10 +22,10 @@ import org.apache.parquet.schema.MessageType import org.apache.spark.rdd.RDD import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression} +import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, MetadataAttribute} import org.apache.spark.sql.catalyst.json.rapids.shims.v2.Spark33XFileOptionsShims import org.apache.spark.sql.connector.read.{Scan, SupportsRuntimeFiltering} -import org.apache.spark.sql.execution.SparkPlan +import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan} import org.apache.spark.sql.execution.datasources.{DataSourceUtils, FilePartition, FileScanRDD, PartitionedFile} import org.apache.spark.sql.execution.datasources.parquet.ParquetFilters import org.apache.spark.sql.execution.datasources.v2.csv.CSVScan @@ -143,6 +143,16 @@ trait Spark33XShims extends Spark33XFileOptionsShims { conf.maxReadBatchSizeBytes) }) ).map(r => (r.getClassFor.asSubclass(classOf[Scan]), r)).toMap + + override def tagFileSourceScanExec(meta: SparkPlanMeta[FileSourceScanExec]): Unit = { + if (meta.wrapped.expressions.exists(expr => expr match { + case MetadataAttribute(expr) => true + case _ => false + })) { + meta.willNotWorkOnGpu("hidden metadata columns are not supported on GPU") + } + super.tagFileSourceScanExec(meta) + } } // Fallback to the default definition of `deterministic` diff --git a/sql-plugin/src/main/scala/ai/rapids/cudf/HostConcatResultUtil.scala b/sql-plugin/src/main/scala/ai/rapids/cudf/HostConcatResultUtil.scala new file mode 100644 index 00000000000..30d7289c902 --- /dev/null +++ b/sql-plugin/src/main/scala/ai/rapids/cudf/HostConcatResultUtil.scala @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ai.rapids.cudf + +import ai.rapids.cudf.JCudfSerialization.HostConcatResult +import com.nvidia.spark.rapids.{Arm, GpuColumnVectorFromBuffer} + +import org.apache.spark.sql.types.DataType +import org.apache.spark.sql.vectorized.ColumnarBatch + +object HostConcatResultUtil extends Arm { + /** + * Create a rows-only `HostConcatResult`. + */ + def rowsOnlyHostConcatResult(numRows: Int): HostConcatResult = { + new HostConcatResult( + new JCudfSerialization.SerializedTableHeader( + Array.empty, numRows, 0L), + HostMemoryBuffer.allocate(0, false)) + } + + /** + * Given a `HostConcatResult` and a SparkSchema produce a `ColumnarBatch`, + * handling the rows-only case. + * + * @note This function does not consume the `HostConcatResult`, and + * callers are responsible for closing the resulting `ColumnarBatch` + */ + def getColumnarBatch( + hostConcatResult: HostConcatResult, + sparkSchema: Array[DataType]): ColumnarBatch = { + if (hostConcatResult.getTableHeader.getNumColumns == 0) { + // We expect the caller to have acquired the GPU unconditionally before calling + // `getColumnarBatch`, as a downstream exec may need the GPU, and the assumption is + // that it is acquired in the coalesce code. + new ColumnarBatch(Array.empty, hostConcatResult.getTableHeader.getNumRows) + } else { + withResource(hostConcatResult.toContiguousTable) { ct => + GpuColumnVectorFromBuffer.from(ct, sparkSchema) + } + } + } +} diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCoalesceBatches.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCoalesceBatches.scala index d4333f7bc15..26b1c0c4116 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCoalesceBatches.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCoalesceBatches.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -46,7 +46,20 @@ object ConcatAndConsumeAll { * @return a single batch with all of them concated together. */ def buildNonEmptyBatch(arrayOfBatches: Array[ColumnarBatch], - schema: StructType): ColumnarBatch = { + schema: StructType): ColumnarBatch = + buildNonEmptyBatchFromTypes( + arrayOfBatches, GpuColumnVector.extractTypes(schema)) + + /** + * Build a single batch from the batches collected so far. If array is empty this will likely + * blow up. + * @param arrayOfBatches the batches to concat. This will be consumed and you do not need to + * close any of the batches after this is called. + * @param dataTypes the output types. + * @return a single batch with all of them concated together. + */ + def buildNonEmptyBatchFromTypes(arrayOfBatches: Array[ColumnarBatch], + dataTypes: Array[DataType]): ColumnarBatch = { if (arrayOfBatches.length == 1) { arrayOfBatches(0) } else { @@ -54,7 +67,7 @@ object ConcatAndConsumeAll { try { val combined = Table.concatenate(tables: _*) try { - GpuColumnVector.from(combined, GpuColumnVector.extractTypes(schema)) + GpuColumnVector.from(combined, dataTypes) } finally { combined.close() } @@ -410,9 +423,8 @@ abstract class AbstractGpuCoalesceIterator( } class GpuCoalesceIterator(iter: Iterator[ColumnarBatch], - schema: StructType, + sparkTypes: Array[DataType], goal: CoalesceSizeGoal, - maxDecompressBatchMemory: Long, numInputRows: GpuMetric, numInputBatches: GpuMetric, numOutputRows: GpuMetric, @@ -422,8 +434,7 @@ class GpuCoalesceIterator(iter: Iterator[ColumnarBatch], opTime: GpuMetric, peakDevMemory: GpuMetric, spillCallback: SpillCallback, - opName: String, - codecConfigs: TableCompressionCodecConfig) + opName: String) extends AbstractGpuCoalesceIterator(iter, goal, numInputRows, @@ -435,8 +446,7 @@ class GpuCoalesceIterator(iter: Iterator[ColumnarBatch], opTime, opName) with Arm { - private val sparkTypes: Array[DataType] = GpuColumnVector.extractTypes(schema) - private val batches: ArrayBuffer[SpillableColumnarBatch] = ArrayBuffer.empty + protected val batches: ArrayBuffer[SpillableColumnarBatch] = ArrayBuffer.empty private var maxDeviceMemory: Long = 0 override def initNewBatch(batch: ColumnarBatch): Unit = { @@ -448,10 +458,85 @@ class GpuCoalesceIterator(iter: Iterator[ColumnarBatch], batches.append(SpillableColumnarBatch(batch, SpillPriorities.ACTIVE_BATCHING_PRIORITY, spillCallback)) + protected def popAll(): Array[ColumnarBatch] = { + closeOnExcept(batches.toArray.safeMap(_.getColumnarBatch())) { wip => + batches.safeClose() + batches.clear() + wip + } + } + + override def concatAllAndPutOnGPU(): ColumnarBatch = { + val ret = ConcatAndConsumeAll.buildNonEmptyBatchFromTypes(popAll(), sparkTypes) + // sum of current batches and concatenating batches. Approximately sizeof(ret * 2). + maxDeviceMemory = GpuColumnVector.getTotalDeviceMemoryUsed(ret) * 2 + ret + } + + override def cleanupConcatIsDone(): Unit = { + peakDevMemory.set(maxDeviceMemory) + batches.clear() + } + + private var onDeck: Option[SpillableColumnarBatch] = None + + override protected def hasOnDeck: Boolean = onDeck.isDefined + + override protected def saveOnDeck(batch: ColumnarBatch): Unit = { + assert(onDeck.isEmpty) + onDeck = Some(SpillableColumnarBatch(batch, SpillPriorities.ACTIVE_ON_DECK_PRIORITY, + spillCallback)) + } + + override protected def clearOnDeck(): Unit = { + onDeck.foreach(_.close()) + onDeck = None + } + + override protected def popOnDeck(): ColumnarBatch = { + val ret = onDeck.get.getColumnarBatch() + clearOnDeck() + ret + } +} + +/** + * Compression codec-aware `GpuCoalesceIterator` subclass which should be used in cases + * where the RAPIDS Shuffle Manager could be configured, as batches to be coalesced + * may be compressed. + */ +class GpuCompressionAwareCoalesceIterator( + iter: Iterator[ColumnarBatch], + sparkTypes: Array[DataType], + goal: CoalesceSizeGoal, + maxDecompressBatchMemory: Long, + numInputRows: GpuMetric, + numInputBatches: GpuMetric, + numOutputRows: GpuMetric, + numOutputBatches: GpuMetric, + collectTime: GpuMetric, + concatTime: GpuMetric, + opTime: GpuMetric, + peakDevMemory: GpuMetric, + spillCallback: SpillCallback, + opName: String, + codecConfigs: TableCompressionCodecConfig) + extends GpuCoalesceIterator( + iter, sparkTypes, goal, + numInputRows = numInputRows, + numInputBatches = numInputBatches, + numOutputRows = numOutputRows, + numOutputBatches = numOutputBatches, + collectTime = collectTime, + concatTime = concatTime, + opTime = opTime, + peakDevMemory = peakDevMemory, + spillCallback, opName) { + private[this] var codec: TableCompressionCodec = _ - private[this] def popAllDecompressed(): Array[ColumnarBatch] = { - closeOnExcept(batches.map(_.getColumnarBatch())) { wip => + override protected def popAll(): Array[ColumnarBatch] = { + closeOnExcept(batches.toArray.safeMap(_.getColumnarBatch())) { wip => batches.safeClose() batches.clear() @@ -487,42 +572,9 @@ class GpuCoalesceIterator(iter: Iterator[ColumnarBatch], } } } - wip.toArray + wip } } - - override def concatAllAndPutOnGPU(): ColumnarBatch = { - val ret = ConcatAndConsumeAll.buildNonEmptyBatch(popAllDecompressed(), schema) - // sum of current batches and concatenating batches. Approximately sizeof(ret * 2). - maxDeviceMemory = GpuColumnVector.getTotalDeviceMemoryUsed(ret) * 2 - ret - } - - override def cleanupConcatIsDone(): Unit = { - peakDevMemory.set(maxDeviceMemory) - batches.clear() - } - - private var onDeck: Option[SpillableColumnarBatch] = None - - override protected def hasOnDeck: Boolean = onDeck.isDefined - - override protected def saveOnDeck(batch: ColumnarBatch): Unit = { - assert(onDeck.isEmpty) - onDeck = Some(SpillableColumnarBatch(batch, SpillPriorities.ACTIVE_ON_DECK_PRIORITY, - spillCallback)) - } - - override protected def clearOnDeck(): Unit = { - onDeck.foreach(_.close()) - onDeck = None - } - - override protected def popOnDeck(): ColumnarBatch = { - val ret = onDeck.get.getColumnarBatch() - clearOnDeck() - ret - } } case class GpuCoalesceBatches(child: SparkPlan, goal: CoalesceGoal) @@ -579,6 +631,7 @@ case class GpuCoalesceBatches(child: SparkPlan, goal: CoalesceGoal) // cache in local vars to avoid serializing the plan val outputSchema = schema + val dataTypes = GpuColumnVector.extractTypes(outputSchema) val decompressMemoryTarget = maxDecompressBatchMemory val batches = child.executeColumnar() @@ -593,7 +646,8 @@ case class GpuCoalesceBatches(child: SparkPlan, goal: CoalesceGoal) goal match { case sizeGoal: CoalesceSizeGoal => batches.mapPartitions { iter => - new GpuCoalesceIterator(iter, outputSchema, sizeGoal, decompressMemoryTarget, + new GpuCompressionAwareCoalesceIterator( + iter, dataTypes, sizeGoal, decompressMemoryTarget, numInputRows, numInputBatches, numOutputRows, numOutputBatches, NoopMetric, concatTime, opTime, peakDevMemory, callback, "GpuCoalesceBatches", codecConfigs) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuMultiFileReader.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuMultiFileReader.scala index ffba671dc2d..0bffd6cd3cb 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuMultiFileReader.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuMultiFileReader.scala @@ -26,7 +26,6 @@ import scala.collection.mutable.{ArrayBuffer, LinkedHashMap, Queue} import scala.math.max import ai.rapids.cudf.{ColumnVector, HostMemoryBuffer, NvtxColor, NvtxRange, Table} -import com.google.common.util.concurrent.ThreadFactoryBuilder import com.nvidia.spark.rapids.GpuMetric.{NUM_OUTPUT_BATCHES, PEAK_DEVICE_MEMORY, SEMAPHORE_WAIT_TIME} import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOrcScanBase.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOrcScanBase.scala index e6063853eb2..6493d2cd3a7 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOrcScanBase.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOrcScanBase.scala @@ -36,6 +36,7 @@ import com.google.protobuf.CodedOutputStream import com.nvidia.spark.rapids.GpuMetric._ import com.nvidia.spark.rapids.RapidsPluginImplicits._ import com.nvidia.spark.rapids.SchemaUtils._ +import com.nvidia.spark.rapids.shims.v2.OrcShims import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.hive.common.io.DiskRangeList @@ -55,10 +56,10 @@ import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, Par import org.apache.spark.sql.execution.QueryExecutionException import org.apache.spark.sql.execution.datasources.PartitionedFile import org.apache.spark.sql.execution.datasources.orc.OrcUtils +import org.apache.spark.sql.execution.datasources.rapids.OrcFiltersWrapper import org.apache.spark.sql.execution.datasources.v2.{EmptyPartitionReader, FilePartitionReaderFactory} import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.rapids.OrcFilters import org.apache.spark.sql.rapids.execution.TrampolineUtil import org.apache.spark.sql.sources.Filter import org.apache.spark.sql.types.{ArrayType, DataType, DecimalType, MapType, StructType} @@ -319,13 +320,13 @@ trait OrcCommonFunctions extends OrcCodecWritingHelper { withResource(OrcTools.buildDataReader(ctx)) { dataReader => val start = System.nanoTime() - val bufferChunks = dataReader.readFileData(inputDataRanges, 0, false) + val bufferChunks = OrcShims.readFileData(dataReader, inputDataRanges) val mid = System.nanoTime() var current = bufferChunks while (current != null) { out.write(current.getData) if (dataReader.isTrackingDiskRanges && current.isInstanceOf[BufferChunk]) { - dataReader.releaseBuffer(current.asInstanceOf[BufferChunk].getChunk) + dataReader.releaseBuffer(current.getData) } current = current.next } @@ -740,17 +741,18 @@ private object OrcTools extends Arm { } val maxDiskRangeChunkLimit = OrcConf.ORC_MAX_DISK_RANGE_CHUNK_LIMIT.getInt(conf) val file = filePath.getFileSystem(conf).open(filePath) + + val typeCount = org.apache.orc.OrcUtils.getOrcTypes(fileSchema).size //noinspection ScalaDeprecation - RecordReaderUtils.createDefaultDataReader(DataReaderProperties.builder() - .withBufferSize(compressionSize) - .withCompression(compressionKind) - .withFileSystem(fs) - .withPath(filePath) - .withFile(file) // explicitly specify the FSDataInputStream - .withTypeCount(org.apache.orc.OrcUtils.getOrcTypes(fileSchema).size) - .withZeroCopy(zeroCopy) - .withMaxDiskRangeChunkLimit(maxDiskRangeChunkLimit) - .build()) + val reader = RecordReaderUtils.createDefaultDataReader( + OrcShims.newDataReaderPropertiesBuilder(compressionSize, compressionKind, typeCount) + .withFileSystem(fs) + .withPath(filePath) + .withZeroCopy(zeroCopy) + .withMaxDiskRangeChunkLimit(maxDiskRangeChunkLimit) + .build()) + reader.open() // 311cdh needs to initialize the internal FSDataInputStream file variable. + reader } } @@ -783,8 +785,8 @@ private case class GpuOrcFileFilterHandler( val orcFileReaderOpts = OrcFile.readerOptions(conf).filesystem(fs) // After getting the necessary information from ORC reader, we must close the ORC reader - withResource(OrcFile.createReader(filePath, orcFileReaderOpts)) { orcReader => - val resultedColPruneInfo = requestedColumnIds(isCaseSensitive, dataSchema, + OrcShims.withReader(OrcFile.createReader(filePath, orcFileReaderOpts)) { orcReader => + val resultedColPruneInfo = requestedColumnIds(isCaseSensitive, dataSchema, readDataSchema, orcReader) if (resultedColPruneInfo.isEmpty) { // Be careful when the OrcPartitionReaderContext is null, we should change @@ -822,7 +824,7 @@ private case class GpuOrcFileFilterHandler( val readerOpts = OrcInputFormat.buildOptions( conf, orcReader, partFile.start, partFile.length) // create the search argument if we have pushed filters - OrcFilters.createFilter(fullSchema, pushedFilters).foreach { f => + OrcFiltersWrapper.createFilter(fullSchema, pushedFilters).foreach { f => readerOpts.searchArgument(f, fullSchema.fieldNames) } readerOpts @@ -882,7 +884,7 @@ private case class GpuOrcFileFilterHandler( if (matchedOrcFields.size > 1) { // Need to fail if there is ambiguity, i.e. more than one field is matched. val matchedOrcFieldsString = matchedOrcFields.mkString("[", ", ", "]") - reader.close() + OrcShims.closeReader(reader) throw new RuntimeException(s"""Found duplicate field(s) "$requiredFieldName": """ + s"$matchedOrcFieldsString in case-insensitive mode") } else { @@ -1088,29 +1090,10 @@ private case class GpuOrcFileFilterHandler( val fileIncluded = calcOrcFileIncluded(evolution) val (columnMapping, idMapping) = columnRemap(fileIncluded, evolution.getFileSchema, updatedReadSchema, isCaseSensitive) - val result = new ArrayBuffer[OrcOutputStripe](stripes.length) - stripes.foreach { stripe => - val stripeFooter = dataReader.readStripeFooter(stripe) - val needStripe = if (sargApp != null) { - // An ORC schema is a single struct type describing the schema fields - val orcFileSchema = evolution.getFileType(0) - val orcIndex = dataReader.readRowIndex(stripe, orcFileSchema, stripeFooter, - ignoreNonUtf8BloomFilter, fileIncluded, null, sargColumns, - writerVersion, null, null) - val rowGroups = sargApp.pickRowGroups(stripe, orcIndex.getRowGroupIndex, - orcIndex.getBloomFilterKinds, stripeFooter.getColumnsList, orcIndex.getBloomFilterIndex, - true) - rowGroups != SargApplier.READ_NO_RGS - } else { - true - } - - if (needStripe) { - result.append(buildOutputStripe(stripe, stripeFooter, columnMapping, idMapping)) - } - } - - result + OrcShims.filterStripes(stripes, conf, orcReader, dataReader, + buildOutputStripe, evolution, + sargApp, sargColumns, ignoreNonUtf8BloomFilter, + writerVersion, fileIncluded, columnMapping, idMapping) } /** @@ -1552,8 +1535,8 @@ trait OrcCodecWritingHelper extends Arm { // note that this buffer is just for writing meta-data OrcConf.BUFFER_SIZE.getDefaultValue.asInstanceOf[Int] } - withResource(new OutStream(getClass.getSimpleName, orcBufferSize, codec, - outReceiver)) { codecStream => + withResource(OrcShims.newOrcOutStream( + getClass.getSimpleName, orcBufferSize, codec, outReceiver)) { codecStream => val protoWriter = CodedOutputStream.newInstance(codecStream) block(outChannel, protoWriter, codecStream) } diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala index 16280a47179..82333fa3721 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala @@ -492,11 +492,6 @@ object GpuOverrides extends Logging { listeners.clear() } - def canRegexpBeTreatedLikeARegularString(strLit: UTF8String): Boolean = { - val s = strLit.toString - !regexList.exists(pattern => s.contains(pattern)) - } - private def convertPartToGpuIfPossible(part: Partitioning, conf: RapidsConf): Partitioning = { part match { case _: GpuPartitioning => part @@ -3352,7 +3347,23 @@ object GpuOverrides extends Logging { TypeSig.DATE)), Some(RepeatingParamCheck("step", TypeSig.integral, TypeSig.integral + TypeSig.CALENDAR))), (a, conf, p, r) => new GpuSequenceMeta(a, conf, p, r) - ) + ), + expr[BitLength]( + "The bit length of string data", + ExprChecks.unaryProject( + TypeSig.INT, TypeSig.INT, + TypeSig.STRING, TypeSig.STRING + TypeSig.BINARY), + (a, conf, p, r) => new UnaryExprMeta[BitLength](a, conf, p, r) { + override def convertToGpu(child: Expression): GpuExpression = GpuBitLength(child) + }), + expr[OctetLength]( + "The byte length of string data", + ExprChecks.unaryProject( + TypeSig.INT, TypeSig.INT, + TypeSig.STRING, TypeSig.STRING + TypeSig.BINARY), + (a, conf, p, r) => new UnaryExprMeta[OctetLength](a, conf, p, r) { + override def convertToGpu(child: Expression): GpuExpression = GpuOctetLength(child) + }) ).map(r => (r.getClassFor.asSubclass(classOf[Expression]), r)).toMap // Shim expressions should be last to allow overrides with shim-specific versions diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffleCoalesceExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffleCoalesceExec.scala index ba42c2d8f3e..c85d6fe1a60 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffleCoalesceExec.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffleCoalesceExec.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ package com.nvidia.spark.rapids import java.util -import ai.rapids.cudf.{HostMemoryBuffer, JCudfSerialization, NvtxColor, NvtxRange} -import ai.rapids.cudf.JCudfSerialization.SerializedTableHeader +import ai.rapids.cudf.{HostConcatResultUtil, HostMemoryBuffer, JCudfSerialization, NvtxColor, NvtxRange} +import ai.rapids.cudf.JCudfSerialization.{HostConcatResult, SerializedTableHeader} import com.nvidia.spark.rapids.shims.v2.ShimUnaryExecNode import org.apache.spark.TaskContext @@ -61,10 +61,12 @@ case class GpuShuffleCoalesceExec(child: SparkPlan, targetBatchByteSize: Long) override def doExecuteColumnar(): RDD[ColumnarBatch] = { val metricsMap = allMetrics val targetSize = targetBatchByteSize - val sparkSchema = GpuColumnVector.extractTypes(schema) + val dataTypes = GpuColumnVector.extractTypes(schema) child.executeColumnar().mapPartitions { iter => - new GpuShuffleCoalesceIterator(iter, targetSize, sparkSchema, metricsMap) + new GpuShuffleCoalesceIterator( + new HostShuffleCoalesceIterator(iter, targetSize, dataTypes, metricsMap), + dataTypes, metricsMap) } } } @@ -72,22 +74,18 @@ case class GpuShuffleCoalesceExec(child: SparkPlan, targetBatchByteSize: Long) /** * Iterator that coalesces columnar batches that are expected to only contain * [[SerializedTableColumn]]. The serialized tables within are collected up - * to the target batch size and then concatenated on the host before the data - * is transferred to the GPU. + * to the target batch size and then concatenated on the host before handing + * them to the caller on `.next()` */ -class GpuShuffleCoalesceIterator( +class HostShuffleCoalesceIterator( iter: Iterator[ColumnarBatch], targetBatchByteSize: Long, - sparkSchema: Array[DataType], + dataTypes: Array[DataType], metricsMap: Map[String, GpuMetric]) - extends Iterator[ColumnarBatch] with Arm with AutoCloseable { - private[this] val opTimeMetric = metricsMap(GpuMetric.OP_TIME) + extends Iterator[HostConcatResult] with Arm with AutoCloseable { + private[this] val concatTimeMetric = metricsMap(GpuMetric.CONCAT_TIME) private[this] val inputBatchesMetric = metricsMap(GpuMetric.NUM_INPUT_BATCHES) private[this] val inputRowsMetric = metricsMap(GpuMetric.NUM_INPUT_ROWS) - private[this] val outputBatchesMetric = metricsMap(GpuMetric.NUM_OUTPUT_BATCHES) - private[this] val outputRowsMetric = metricsMap(GpuMetric.NUM_OUTPUT_ROWS) - private[this] val concatTimeMetric = metricsMap(GpuMetric.CONCAT_TIME) - private[this] val semWaitTime = metricsMap(GpuMetric.SEMAPHORE_WAIT_TIME) private[this] val serializedTables = new util.ArrayDeque[SerializedTableColumn] private[this] var numTablesInBatch: Int = 0 private[this] var numRowsInBatch: Int = 0 @@ -95,21 +93,44 @@ class GpuShuffleCoalesceIterator( Option(TaskContext.get()).foreach(_.addTaskCompletionListener[Unit](_ => close())) - override def hasNext: Boolean = { - bufferNextBatch() - numTablesInBatch > 0 + override def close(): Unit = { + serializedTables.forEach(_.close()) + serializedTables.clear() } - override def next(): ColumnarBatch = { - if (!hasNext) { - throw new NoSuchElementException("No more columnar batches") + def concatenateTablesInHost(): HostConcatResult = { + val result = withResource(new MetricRange(concatTimeMetric)) { _ => + val firstHeader = serializedTables.peekFirst().header + if (firstHeader.getNumColumns == 0) { + (0 until numTablesInBatch).foreach(_ => serializedTables.removeFirst()) + HostConcatResultUtil.rowsOnlyHostConcatResult(numRowsInBatch) + } else { + val headers = new Array[SerializedTableHeader](numTablesInBatch) + withResource(new Array[HostMemoryBuffer](numTablesInBatch)) { buffers => + headers.indices.foreach { i => + val serializedTable = serializedTables.removeFirst() + headers(i) = serializedTable.header + buffers(i) = serializedTable.hostBuffer + } + JCudfSerialization.concatToHostBuffer(headers, buffers) + } + } } - concatenateBatch() - } - override def close(): Unit = { - serializedTables.forEach(_.close()) - serializedTables.clear() + // update the stats for the next batch in progress + numTablesInBatch = serializedTables.size + + batchByteSize = 0 + numRowsInBatch = 0 + if (numTablesInBatch > 0) { + require(numTablesInBatch == 1, + "should only track at most one buffer that is not in a batch") + val header = serializedTables.peekFirst().header + batchByteSize = header.getDataLen + numRowsInBatch = header.getNumRows + } + + result } private def bufferNextBatch(): Unit = { @@ -120,7 +141,7 @@ class GpuShuffleCoalesceIterator( inputBatchesMetric += 1 // don't bother tracking empty tables if (batch.numRows > 0) { - inputRowsMetric += batch.numRows + inputRowsMetric += batch.numRows() val tableColumn = batch.column(0).asInstanceOf[SerializedTableColumn] batchCanGrow = canAddToBatch(tableColumn.header) serializedTables.addLast(tableColumn) @@ -138,6 +159,18 @@ class GpuShuffleCoalesceIterator( } } + override def hasNext(): Boolean = { + bufferNextBatch() + numTablesInBatch > 0 + } + + override def next(): HostConcatResult = { + if (!hasNext()) { + throw new NoSuchElementException("No more host batches to concatenate") + } + concatenateTablesInHost() + } + private def canAddToBatch(nextTable: SerializedTableHeader): Boolean = { if (batchByteSize + nextTable.getDataLen > targetBatchByteSize) { return false @@ -147,60 +180,41 @@ class GpuShuffleCoalesceIterator( } true } +} - private def concatenateBatch(): ColumnarBatch = { - val firstHeader = serializedTables.peekFirst().header - val batch = withResource(new MetricRange(concatTimeMetric)) { _ => - if (firstHeader.getNumColumns == 0) { - // acquire the GPU unconditionally for now in this case, as a downstream exec - // may need the GPU, and the assumption is that it is acquired in the coalesce - // code. - GpuSemaphore.acquireIfNecessary(TaskContext.get(), semWaitTime) - (0 until numTablesInBatch).foreach(_ => serializedTables.removeFirst()) - new ColumnarBatch(Array.empty, numRowsInBatch) - } else { - concatenateTablesBatch() - } - } +/** + * Iterator that coalesces columnar batches that are expected to only contain + * [[SerializedTableColumn]]. The serialized tables within are collected up + * to the target batch size and then concatenated on the host before the data + * is transferred to the GPU. + */ +class GpuShuffleCoalesceIterator(iter: Iterator[HostConcatResult], + dataTypes: Array[DataType], + metricsMap: Map[String, GpuMetric]) + extends Iterator[ColumnarBatch] with Arm { + private[this] val semWaitTime = metricsMap(GpuMetric.SEMAPHORE_WAIT_TIME) + private[this] val opTimeMetric = metricsMap(GpuMetric.OP_TIME) + private[this] val outputBatchesMetric = metricsMap(GpuMetric.NUM_OUTPUT_BATCHES) + private[this] val outputRowsMetric = metricsMap(GpuMetric.NUM_OUTPUT_ROWS) - withResource(new MetricRange(opTimeMetric)) { _ => - outputBatchesMetric += 1 - outputRowsMetric += batch.numRows - - // update the stats for the next batch in progress - numTablesInBatch = serializedTables.size - batchByteSize = 0 - numRowsInBatch = 0 - if (numTablesInBatch > 0) { - require(numTablesInBatch == 1, - "should only track at most one buffer that is not in a batch") - val header = serializedTables.peekFirst().header - batchByteSize = header.getDataLen - numRowsInBatch = header.getNumRows - } + override def hasNext: Boolean = iter.hasNext - batch + override def next(): ColumnarBatch = { + if (!hasNext) { + throw new NoSuchElementException("No more columnar batches") } - } - - private def concatenateTablesBatch(): ColumnarBatch = { - val headers = new Array[SerializedTableHeader](numTablesInBatch) - withResource(new Array[HostMemoryBuffer](numTablesInBatch)) { buffers => - headers.indices.foreach { i => - val serializedTable = serializedTables.removeFirst() - headers(i) = serializedTable.header - buffers(i) = serializedTable.hostBuffer - } - - withResource(new NvtxRange("Concat+Load Batch", NvtxColor.YELLOW)) { _ => - withResource(JCudfSerialization.concatToHostBuffer(headers, buffers)) { hostConcatResult => - // about to start using the GPU in this task - GpuSemaphore.acquireIfNecessary(TaskContext.get(), semWaitTime) - withResource(new MetricRange(opTimeMetric)) { _ => - withResource(hostConcatResult.toContiguousTable) { contigTable => - GpuColumnVectorFromBuffer.from(contigTable, sparkSchema) - } - } + withResource(new NvtxRange("Concat+Load Batch", NvtxColor.YELLOW)) { _ => + withResource(iter.next()) { hostConcatResult => + // We acquire the GPU regardless of whether `hostConcatResult` + // is an empty batch or not, because the downstream tasks expect + // the `GpuShuffleCoalesceIterator` to acquire the semaphore and may + // generate GPU data from batches that are empty. + GpuSemaphore.acquireIfNecessary(TaskContext.get(), semWaitTime) + withResource(new MetricRange(opTimeMetric)) { _ => + val batch = HostConcatResultUtil.getColumnarBatch(hostConcatResult, dataTypes) + outputBatchesMetric += 1 + outputRowsMetric += batch.numRows() + batch } } } diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffledHashJoinExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffledHashJoinExec.scala index 675ec01758e..8c33ede98bc 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffledHashJoinExec.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffledHashJoinExec.scala @@ -16,8 +16,11 @@ package com.nvidia.spark.rapids +import ai.rapids.cudf.{HostConcatResultUtil, NvtxColor, NvtxRange} +import ai.rapids.cudf.JCudfSerialization.HostConcatResult import com.nvidia.spark.rapids.shims.v2.{GpuHashPartitioning, GpuJoinUtils, ShimBinaryExecNode} +import org.apache.spark.TaskContext import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} @@ -97,7 +100,9 @@ case class GpuShuffledHashJoinExec( override val outputBatchesLevel: MetricsLevel = MODERATE_LEVEL override lazy val additionalMetrics: Map[String, GpuMetric] = Map( OP_TIME -> createNanoTimingMetric(MODERATE_LEVEL, DESCRIPTION_OP_TIME), + CONCAT_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_CONCAT_TIME), BUILD_DATA_SIZE -> createSizeMetric(ESSENTIAL_LEVEL, DESCRIPTION_BUILD_DATA_SIZE), + PEAK_DEVICE_MEMORY -> createNanoTimingMetric(MODERATE_LEVEL, DESCRIPTION_PEAK_DEVICE_MEMORY), BUILD_TIME -> createNanoTimingMetric(ESSENTIAL_LEVEL, DESCRIPTION_BUILD_TIME), STREAM_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_STREAM_TIME), JOIN_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_JOIN_TIME), @@ -123,28 +128,39 @@ case class GpuShuffledHashJoinExec( val numOutputRows = gpuLongMetric(NUM_OUTPUT_ROWS) val numOutputBatches = gpuLongMetric(NUM_OUTPUT_BATCHES) val opTime = gpuLongMetric(OP_TIME) - val buildTime = gpuLongMetric(BUILD_TIME) val streamTime = gpuLongMetric(STREAM_TIME) val joinTime = gpuLongMetric(JOIN_TIME) val joinOutputRows = gpuLongMetric(JOIN_OUTPUT_ROWS) - val targetSize = RapidsConf.GPU_BATCH_SIZE_BYTES.get(conf) + val batchSizeBytes = RapidsConf.GPU_BATCH_SIZE_BYTES.get(conf) val spillCallback = GpuMetric.makeSpillCallback(allMetrics) - val localBuildOutput: Seq[Attribute] = buildPlan.output + val localBuildOutput = buildPlan.output + + // Create a map of metrics that can be handed down to shuffle and coalesce + // iterators, setting as noop certain metrics that the coalesce iterators + // normally update, but that in the case of the join they would produce + // the wrong statistics (since there are conflicts) + val coalesceMetricsMap = allMetrics + + (GpuMetric.NUM_INPUT_ROWS -> NoopMetric, + GpuMetric.NUM_INPUT_BATCHES -> NoopMetric, + GpuMetric.NUM_OUTPUT_BATCHES -> NoopMetric, + GpuMetric.NUM_OUTPUT_ROWS -> NoopMetric) streamedPlan.executeColumnar().zipPartitions(buildPlan.executeColumnar()) { (streamIter, buildIter) => { - val stIt = new CollectTimeIterator("shuffled join stream", streamIter, streamTime) - val startTime = System.nanoTime() + val (builtBatch, maybeBufferedStreamIter) = + GpuShuffledHashJoinExec.getBuiltBatchAndStreamIter( + batchSizeBytes, + localBuildOutput, + buildIter, + new CollectTimeIterator("shuffled join stream", streamIter, streamTime), + spillCallback, + coalesceMetricsMap) - withResource(ConcatAndConsumeAll.getSingleBatchWithVerification(buildIter, - localBuildOutput)) { builtBatch => + withResource(builtBatch) { _ => // doJoin will increment the reference counts as needed for the builtBatch - val delta = System.nanoTime() - startTime - buildTime += delta buildDataSize += GpuColumnVector.getTotalDeviceMemoryUsed(builtBatch) - - doJoin(builtBatch, stIt, targetSize, spillCallback, - numOutputRows, joinOutputRows, numOutputBatches, + doJoin(builtBatch, maybeBufferedStreamIter, + batchSizeBytes, spillCallback, numOutputRows, joinOutputRows, numOutputBatches, opTime, joinTime) } } @@ -155,3 +171,183 @@ case class GpuShuffledHashJoinExec( if (isSkewJoin) super.nodeName + "(skew=true)" else super.nodeName } } + +object GpuShuffledHashJoinExec extends Arm { + /** + * Helper iterator that wraps a BufferedIterator of AutoCloseable subclasses. + * This iterator also implements AutoCloseable, so it can be closed in case + * of exceptions. + * + * @param wrapped the buffered iterator + * @tparam T an AutoCloseable subclass + */ + class CloseableBufferedIterator[T <: AutoCloseable](wrapped: BufferedIterator[T]) + extends BufferedIterator[T] with AutoCloseable { + // register against task completion to close any leaked buffered items + Option(TaskContext.get()).foreach(_.addTaskCompletionListener[Unit](_ => close())) + + private[this] var isClosed = false + override def head: T = wrapped.head + override def headOption: Option[T] = wrapped.headOption + override def next: T = wrapped.next + override def hasNext: Boolean = wrapped.hasNext + override def close(): Unit = { + if (!isClosed) { + headOption.foreach(_.close()) + isClosed = true + } + } + } + + /** + * Gets a `ColumnarBatch` and stream Iterator[ColumnarBatch] pair by acquiring + * the GPU semaphore optimally in the scenario where the build side is relatively + * small (less than `hostTargetBatchSize`). + * + * In the optimal case, this function will load the build side on the host up to the + * goal configuration and if it fits entirely, allow the stream iterator + * to also pull to host its first batch. After the first stream batch is on the host, the + * stream iterator acquires the semaphore and then the build side is copied to the GPU. + * + * Prior to this we would get a build batch on the GPU, acquiring + * the semaphore in the process, and then begin pulling from the stream iterator, + * which could include IO (while holding onto the semaphore). + * + * The function handles the case where the build side goes above the configured batch + * goal, in which case it will concat on the host, grab the semaphore, and continue to + * pull the build iterator to build a bigger batch on the GPU. This is not optimized + * because we hold onto the semaphore during the entire time after realizing the goal + * has been hit. + * + * @param hostTargetBatchSize target batch size goal on the host + * @param buildOutput output attributes of the build plan + * @param buildIter build iterator + * @param streamIter stream iterator + * @param spillCallback metric updater in case downstream iterators spill + * @param coalesceMetricsMap metrics map with metrics to be used in downstream + * iterators + * @return a pair of `ColumnarBatch` and streamed iterator that can be + * used for the join + */ + def getBuiltBatchAndStreamIter( + hostTargetBatchSize: Long, + buildOutput: Seq[Attribute], + buildIter: Iterator[ColumnarBatch], + streamIter: Iterator[ColumnarBatch], + spillCallback: SpillCallback, + coalesceMetricsMap: Map[String, GpuMetric]): (ColumnarBatch, Iterator[ColumnarBatch]) = { + val semWait = coalesceMetricsMap(GpuMetric.SEMAPHORE_WAIT_TIME) + val buildTime = coalesceMetricsMap(GpuMetric.BUILD_TIME) + var bufferedBuildIterator: CloseableBufferedIterator[ColumnarBatch] = null + closeOnExcept(bufferedBuildIterator) { _ => + val startTime = System.nanoTime() + // find if the build side is non-empty, and if the first batch is + // a serialized batch. If neither condition is met, we fallback to the + // `getSingleBatchWithVerification` method. + val firstBatchIsSerialized = { + if (!buildIter.hasNext) { + false + } else { + bufferedBuildIterator = new CloseableBufferedIterator(buildIter.buffered) + val firstBatch = bufferedBuildIterator.head + if (firstBatch.numCols() != 1) { + false + } else { + firstBatch.column(0).isInstanceOf[SerializedTableColumn] + } + } + } + + if (!firstBatchIsSerialized) { + // In this scenario we are getting non host-side batches in the build side + // given the plan rules we expect this to be a single batch + val builtBatch = + ConcatAndConsumeAll.getSingleBatchWithVerification( + Option(bufferedBuildIterator).getOrElse(buildIter), buildOutput) + val delta = System.nanoTime() - startTime + buildTime += delta + (builtBatch, streamIter) + } else { + val dataTypes = buildOutput.map(_.dataType).toArray + val hostConcatIter = new HostShuffleCoalesceIterator(bufferedBuildIterator, + hostTargetBatchSize, dataTypes, coalesceMetricsMap) + withResource(hostConcatIter) { _ => + closeOnExcept(hostConcatIter.next()) { hostConcatResult => + if (!hostConcatIter.hasNext()) { + // add the time it took to fetch that first host-side build batch + buildTime += System.nanoTime() - startTime + // Optimal case, we drained the build iterator and we didn't have a prior + // so it was a single batch, and is entirely on the host. + // We peek at the stream iterator with `hasNext` on the buffered + // iterator, which will grab the semaphore when putting the first stream + // batch on the GPU, and then we bring the build batch to the GPU and return. + val bufferedStreamIter = new CloseableBufferedIterator(streamIter.buffered) + closeOnExcept(bufferedStreamIter) { _ => + withResource(new NvtxRange("first stream batch", NvtxColor.RED)) { _ => + if (bufferedStreamIter.hasNext) { + bufferedStreamIter.head + } else { + GpuSemaphore.acquireIfNecessary(TaskContext.get(), semWait) + } + } + val buildBatch = getBuildBatchOptimized(hostConcatResult, buildOutput, buildTime) + (buildBatch, bufferedStreamIter) + } + } else { + val buildBatch = getBuildBatchFromUnfinished( + Seq(hostConcatResult).iterator ++ hostConcatIter, + buildOutput, spillCallback, coalesceMetricsMap) + buildTime += System.nanoTime() - startTime + (buildBatch, streamIter) + } + } + } + } + } + } + + private def getBuildBatchFromUnfinished( + iterWithPrior: Iterator[HostConcatResult], + buildOutput: Seq[Attribute], + spillCallback: SpillCallback, + coalesceMetricsMap: Map[String, GpuMetric]): ColumnarBatch = { + // In the fallback case we build the same iterator chain that the Spark plan + // would have produced: + // GpuCoalesceIterator(GpuShuffleCoalesceIterator(shuffled build side)) + // This allows us to make the shuffle batches spillable in case we have a large, + // build-side table, as `RequireSingleBatch` is virtually no limit, and we + // know we are now above `hostTargetBatchSize` (which is 2GB by default) + val dataTypes = buildOutput.map(_.dataType).toArray + val shuffleCoalesce = new GpuShuffleCoalesceIterator( + iterWithPrior, + dataTypes, + coalesceMetricsMap) + val res = ConcatAndConsumeAll.getSingleBatchWithVerification( + new GpuCoalesceIterator(shuffleCoalesce, + dataTypes, + RequireSingleBatch, + NoopMetric, NoopMetric, NoopMetric, NoopMetric, NoopMetric, + coalesceMetricsMap(GpuMetric.CONCAT_TIME), + coalesceMetricsMap(GpuMetric.OP_TIME), + coalesceMetricsMap(GpuMetric.PEAK_DEVICE_MEMORY), + spillCallback, + "build batch"), + buildOutput) + res + } + + private def getBuildBatchOptimized( + hostConcatResult: HostConcatResult, + buildOutput: Seq[Attribute], + buildTime: GpuMetric): ColumnarBatch = { + val dataTypes = buildOutput.map(_.dataType).toArray + // we are on the GPU and our build batch is within `targetSizeBytes`. + // we can bring the build batch to the GPU now + withResource(hostConcatResult) { _ => + buildTime.ns { + HostConcatResultUtil.getColumnarBatch(hostConcatResult, dataTypes) + } + } + } +} + diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuTransitionOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuTransitionOverrides.scala index 9e9c496e8b4..54bf57e94fa 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuTransitionOverrides.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuTransitionOverrides.scala @@ -237,6 +237,28 @@ class GpuTransitionOverrides extends Rule[SparkPlan] { p.withNewChildren(p.children.map(optimizeCoalesce)) } + /** + * Removes `GpuCoalesceBatches(GpuShuffleCoalesceExec(build side))` for the build side + * for the shuffled hash join. The coalesce logic has been moved to the + * `GpuShuffleCoalesceExec` class, and is handled differently to prevent holding onto the + * GPU semaphore for stream IO. + */ + def shuffledHashJoinOptimizeShuffle(plan: SparkPlan): SparkPlan = plan match { + case x@GpuShuffledHashJoinExec( + _, _, _, buildSide, _, + left: GpuShuffleCoalesceExec, + GpuCoalesceBatches(GpuShuffleCoalesceExec(rc, _), _),_) if buildSide == GpuBuildRight => + x.withNewChildren( + Seq(shuffledHashJoinOptimizeShuffle(left), shuffledHashJoinOptimizeShuffle(rc))) + case x@GpuShuffledHashJoinExec( + _, _, _, buildSide, _, + GpuCoalesceBatches(GpuShuffleCoalesceExec(lc, _), _), + right: GpuShuffleCoalesceExec, _) if buildSide == GpuBuildLeft => + x.withNewChildren( + Seq(shuffledHashJoinOptimizeShuffle(lc), shuffledHashJoinOptimizeShuffle(right))) + case p => p.withNewChildren(p.children.map(shuffledHashJoinOptimizeShuffle)) + } + private def insertCoalesce(plans: Seq[SparkPlan], goals: Seq[CoalesceGoal], disableUntilInput: Boolean): Seq[SparkPlan] = { plans.zip(goals).map { @@ -550,6 +572,9 @@ class GpuTransitionOverrides extends Rule[SparkPlan] { } updatedPlan = fixupHostColumnarTransitions(updatedPlan) updatedPlan = optimizeCoalesce(updatedPlan) + if (rapidsConf.shuffledHashJoinOptimizeShuffle) { + updatedPlan = shuffledHashJoinOptimizeShuffle(updatedPlan) + } if (rapidsConf.exportColumnarRdd) { updatedPlan = detectAndTagFinalColumnarOutput(updatedPlan) } diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index 93ce85ce3e1..16ff8eb02f2 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -21,7 +21,9 @@ import java.util.Properties import java.util.concurrent.atomic.{AtomicBoolean, AtomicReference} import scala.collection.JavaConverters._ +import scala.collection.mutable.{Map => MutableMap} import scala.util.Try +import scala.util.matching.Regex import com.nvidia.spark.rapids.python.PythonWorkerSemaphore @@ -391,25 +393,33 @@ object ExecutionPlanCaptureCallback { executedPlan.expressions.exists(didFallBack(_, fallbackCpuClass)) } - private def containsExpression(exp: Expression, className: String): Boolean = exp.find { + private def containsExpression(exp: Expression, className: String, + regexMap: MutableMap[String, Regex] // regex memoization + ): Boolean = exp.find { case e if PlanUtils.getBaseNameFromClass(e.getClass.getName) == className => true - case e: ExecSubqueryExpression => containsPlan(e.plan, className) + case e: ExecSubqueryExpression => containsPlan(e.plan, className, regexMap) case _ => false }.nonEmpty - private def containsPlan(plan: SparkPlan, className: String): Boolean = plan.find { + private def containsPlan(plan: SparkPlan, className: String, + regexMap: MutableMap[String, Regex] = MutableMap.empty // regex memoization + ): Boolean = plan.find { case p if PlanUtils.sameClass(p, className) => true case p: AdaptiveSparkPlanExec => - containsPlan(p.executedPlan, className) + containsPlan(p.executedPlan, className, regexMap) case p: QueryStageExec => - containsPlan(p.plan, className) + containsPlan(p.plan, className, regexMap) case p: ReusedSubqueryExec => - containsPlan(p.child, className) + containsPlan(p.child, className, regexMap) case p: ReusedExchangeExec => - containsPlan(p.child, className) - case p => - p.expressions.exists(containsExpression(_, className)) + containsPlan(p.child, className, regexMap) + case p if p.expressions.exists(containsExpression(_, className, regexMap)) => + true + case p: SparkPlan => + regexMap.getOrElseUpdate(className, className.r) + .findFirstIn(p.simpleStringWithNodeId()) + .nonEmpty }.nonEmpty } diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsBufferCatalog.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsBufferCatalog.scala index 222207c1151..f7730e84f99 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsBufferCatalog.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsBufferCatalog.scala @@ -299,4 +299,4 @@ object RapidsBufferCatalog extends Logging with Arm { /** Remove a buffer ID from the catalog and release the resources of the registered buffer. */ def removeBuffer(id: RapidsBufferId): Unit = singleton.removeBuffer(id) -} \ No newline at end of file +} diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsBufferStore.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsBufferStore.scala index 92847350df6..e9ac9c1235e 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsBufferStore.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsBufferStore.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -70,11 +70,17 @@ abstract class RapidsBufferStore( } } - def freeAll(): Unit = synchronized { - val values = buffers.values().toArray(new Array[RapidsBufferBase](0)) + def freeAll(): Unit = { + val values = synchronized { + val buffs = buffers.values().toArray(new Array[RapidsBufferBase](0)) + buffers.clear() + spillable.clear() + buffs + } + // We need to release the `RapidsBufferStore` lock to prevent a lock order inversion + // deadlock: (1) `RapidsBufferBase.free` calls (2) `RapidsBufferStore.remove` and + // (1) `RapidsBufferStore.freeAll` calls (2) `RapidsBufferBase.free`. values.foreach(_.free()) - buffers.clear() - spillable.clear() } def nextSpillableBuffer(): RapidsBufferBase = synchronized { diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala index aec84a1f4a6..4042a466cbc 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala @@ -471,6 +471,16 @@ object RapidsConf { .booleanConf .createWithDefault(false) + val SHUFFLED_HASH_JOIN_OPTIMIZE_SHUFFLE = + conf("spark.rapids.sql.shuffledHashJoin.optimizeShuffle") + .doc("Enable or disable an optimization where shuffled build side batches are kept " + + "on the host while the first stream batch is loaded onto the GPU. The optimization " + + "increases off-heap host memory usage to avoid holding onto the GPU semaphore while " + + "waiting for stream side IO.") + .internal() + .booleanConf + .createWithDefault(true) + val STABLE_SORT = conf("spark.rapids.sql.stableSort.enabled") .doc("Enable or disable stable sorting. Apache Spark's sorting is typically a stable " + "sort, but sort stability cannot be guaranteed in distributed work loads because the " + @@ -1484,6 +1494,8 @@ class RapidsConf(conf: Map[String, String]) extends Logging { lazy val exportColumnarRdd: Boolean = get(EXPORT_COLUMNAR_RDD) + lazy val shuffledHashJoinOptimizeShuffle: Boolean = get(SHUFFLED_HASH_JOIN_OPTIMIZE_SHUFFLE) + lazy val stableSort: Boolean = get(STABLE_SORT) lazy val isIncompatEnabled: Boolean = get(INCOMPATIBLE_OPS) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsShuffleHeartbeatManager.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsShuffleHeartbeatManager.scala index ac3705f7004..09dfdfc1869 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsShuffleHeartbeatManager.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsShuffleHeartbeatManager.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,6 @@ import java.util.concurrent.{Executors, ScheduledExecutorService, TimeUnit} import scala.collection.mutable.ArrayBuffer -import com.google.common.util.concurrent.ThreadFactoryBuilder import org.apache.commons.lang3.mutable.MutableLong import org.apache.spark.SparkEnv diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RegexParser.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RegexParser.scala index 79db0286d8e..002c8b3f04b 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RegexParser.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RegexParser.scala @@ -404,6 +404,30 @@ class RegexParser(pattern: String) { } +object RegexParser { + private val regexpChars = Set('\u0000', '\\', '.', '^', '$', '\f') + + def isRegExpString(s: String): Boolean = { + + def isRegExpString(ast: RegexAST): Boolean = ast match { + case RegexChar(ch) => regexpChars.contains(ch) + case RegexEscaped(_) => true + case RegexSequence(parts) => parts.exists(isRegExpString) + case _ => true + } + + try { + val parser = new RegexParser(s) + val ast = parser.parse() + isRegExpString(ast) + } catch { + case _: RegexUnsupportedException => + // if we cannot parse it then assume that it might be valid regexp + true + } + } +} + /** * Transpile Java/Spark regular expression to a format that cuDF supports, or throw an exception * if this is not possible. diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/execution/datasources/rapids/OrcFiltersWrapper.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/execution/datasources/rapids/OrcFiltersWrapper.scala new file mode 100644 index 00000000000..65792c76c82 --- /dev/null +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/execution/datasources/rapids/OrcFiltersWrapper.scala @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.rapids + +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument + +import org.apache.spark.sql.execution.datasources.orc.OrcFilters +import org.apache.spark.sql.sources.Filter +import org.apache.spark.sql.types.StructType + +// Wrapper for Spark OrcFilters which is in private package +object OrcFiltersWrapper { + def createFilter(schema: StructType, filters: Seq[Filter]): Option[SearchArgument] = { + OrcFilters.createFilter(schema, filters) + } +} diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/OrcFilters.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/OrcFilters.scala deleted file mode 100644 index 2dd9973cafd..00000000000 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/OrcFilters.scala +++ /dev/null @@ -1,278 +0,0 @@ -/* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.sql.rapids - -import java.time.{Instant, LocalDate} - -import org.apache.hadoop.hive.common.`type`.HiveDecimal -import org.apache.hadoop.hive.ql.io.sarg.{PredicateLeaf, SearchArgument} -import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.Builder -import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory.newBuilder -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable - -import org.apache.spark.SparkException -import org.apache.spark.sql.catalyst.util.DateTimeUtils.{instantToMicros, localDateToDays, toJavaDate, toJavaTimestamp} -import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.sources.Filter -import org.apache.spark.sql.types._ - -// This is derived from Apache Spark's OrcFilters code to avoid calling the -// Spark version. Spark's version can potentially create a search argument -// applier object that is incompatible with the orc:nohive jar that has been -// shaded as part of this project. - -/** - * Helper object for building ORC `SearchArgument`s, which are used for ORC predicate push-down. - * - * Due to limitation of ORC `SearchArgument` builder, we had to implement separate checking and - * conversion passes through the Filter to make sure we only convert predicates that are known - * to be convertible. - * - * An ORC `SearchArgument` must be built in one pass using a single builder. For example, you can't - * build `a = 1` and `b = 2` first, and then combine them into `a = 1 AND b = 2`. This is quite - * different from the cases in Spark SQL or Parquet, where complex filters can be easily built using - * existing simpler ones. - * - * The annoying part is that, `SearchArgument` builder methods like `startAnd()`, `startOr()`, and - * `startNot()` mutate internal state of the builder instance. This forces us to translate all - * convertible filters with a single builder instance. However, if we try to translate a filter - * before checking whether it can be converted or not, we may end up with a builder whose internal - * state is inconsistent in the case of an inconvertible filter. - * - * For example, to convert an `And` filter with builder `b`, we call `b.startAnd()` first, and then - * try to convert its children. Say we convert `left` child successfully, but find that `right` - * child is inconvertible. Alas, `b.startAnd()` call can't be rolled back, and `b` is inconsistent - * now. - * - * The workaround employed here is to trim the Spark filters before trying to convert them. This - * way, we can only do the actual conversion on the part of the Filter that is known to be - * convertible. - * - * P.S.: Hive seems to use `SearchArgument` together with `ExprNodeGenericFuncDesc` only. Usage of - * builder methods mentioned above can only be found in test code, where all tested filters are - * known to be convertible. - */ -object OrcFilters extends OrcFiltersBase { - - /** - * Create ORC filter as a SearchArgument instance. - */ - def createFilter(schema: StructType, filters: Seq[Filter]): Option[SearchArgument] = { - val dataTypeMap = OrcFilters.getSearchableTypeMap(schema, SQLConf.get.caseSensitiveAnalysis) - // Combines all convertible filters using `And` to produce a single conjunction - val conjunctionOptional = buildTree(convertibleFilters(dataTypeMap, filters)) - conjunctionOptional.map { conjunction => - // Then tries to build a single ORC `SearchArgument` for the conjunction predicate. - // The input predicate is fully convertible. There should not be any empty result in the - // following recursive method call `buildSearchArgument`. - buildSearchArgument(dataTypeMap, conjunction, newBuilder).build() - } - } - - def convertibleFilters( - dataTypeMap: Map[String, OrcPrimitiveField], - filters: Seq[Filter]): Seq[Filter] = { - import org.apache.spark.sql.sources._ - - def convertibleFiltersHelper( - filter: Filter, - canPartialPushDown: Boolean): Option[Filter] = filter match { - // At here, it is not safe to just convert one side and remove the other side - // if we do not understand what the parent filters are. - // - // Here is an example used to explain the reason. - // Let's say we have NOT(a = 2 AND b in ('1')) and we do not understand how to - // convert b in ('1'). If we only convert a = 2, we will end up with a filter - // NOT(a = 2), which will generate wrong results. - // - // Pushing one side of AND down is only safe to do at the top level or in the child - // AND before hitting NOT or OR conditions, and in this case, the unsupported predicate - // can be safely removed. - case And(left, right) => - val leftResultOptional = convertibleFiltersHelper(left, canPartialPushDown) - val rightResultOptional = convertibleFiltersHelper(right, canPartialPushDown) - (leftResultOptional, rightResultOptional) match { - case (Some(leftResult), Some(rightResult)) => Some(And(leftResult, rightResult)) - case (Some(leftResult), None) if canPartialPushDown => Some(leftResult) - case (None, Some(rightResult)) if canPartialPushDown => Some(rightResult) - case _ => None - } - - // The Or predicate is convertible when both of its children can be pushed down. - // That is to say, if one/both of the children can be partially pushed down, the Or - // predicate can be partially pushed down as well. - // - // Here is an example used to explain the reason. - // Let's say we have - // (a1 AND a2) OR (b1 AND b2), - // a1 and b1 is convertible, while a2 and b2 is not. - // The predicate can be converted as - // (a1 OR b1) AND (a1 OR b2) AND (a2 OR b1) AND (a2 OR b2) - // As per the logical in And predicate, we can push down (a1 OR b1). - case Or(left, right) => - for { - lhs <- convertibleFiltersHelper(left, canPartialPushDown) - rhs <- convertibleFiltersHelper(right, canPartialPushDown) - } yield Or(lhs, rhs) - case Not(pred) => - val childResultOptional = convertibleFiltersHelper(pred, canPartialPushDown = false) - childResultOptional.map(Not) - case other => - for (_ <- buildLeafSearchArgument(dataTypeMap, other, newBuilder())) yield other - } - filters.flatMap { filter => - convertibleFiltersHelper(filter, true) - } - } - - /** - * Get PredicateLeafType which is corresponding to the given DataType. - */ - def getPredicateLeafType(dataType: DataType): PredicateLeaf.Type = dataType match { - case BooleanType => PredicateLeaf.Type.BOOLEAN - case ByteType | ShortType | IntegerType | LongType => PredicateLeaf.Type.LONG - case FloatType | DoubleType => PredicateLeaf.Type.FLOAT - case StringType => PredicateLeaf.Type.STRING - case DateType => PredicateLeaf.Type.DATE - case TimestampType => PredicateLeaf.Type.TIMESTAMP - case _: DecimalType => PredicateLeaf.Type.DECIMAL - case _ => throw new UnsupportedOperationException(s"DataType: ${dataType.catalogString}") - } - - /** - * Cast literal values for filters. - * - * We need to cast to long because ORC raises exceptions - * at 'checkLiteralType' of SearchArgumentImpl.java. - */ - private def castLiteralValue(value: Any, dataType: DataType): Any = dataType match { - case ByteType | ShortType | IntegerType | LongType => - value.asInstanceOf[Number].longValue - case FloatType | DoubleType => - value.asInstanceOf[Number].doubleValue() - case _: DecimalType => - new HiveDecimalWritable(HiveDecimal.create(value.asInstanceOf[java.math.BigDecimal])) - case _: DateType if value.isInstanceOf[LocalDate] => - toJavaDate(localDateToDays(value.asInstanceOf[LocalDate])) - case _: TimestampType if value.isInstanceOf[Instant] => - toJavaTimestamp(instantToMicros(value.asInstanceOf[Instant])) - case _ => value - } - - /** - * Build a SearchArgument and return the builder so far. - * - * @param dataTypeMap a map from the attribute name to its data type. - * @param expression the input predicates, which should be fully convertible to SearchArgument. - * @param builder the input SearchArgument.Builder. - * @return the builder so far. - */ - private def buildSearchArgument( - dataTypeMap: Map[String, OrcPrimitiveField], - expression: Filter, - builder: Builder): Builder = { - import org.apache.spark.sql.sources._ - - expression match { - case And(left, right) => - val lhs = buildSearchArgument(dataTypeMap, left, builder.startAnd()) - val rhs = buildSearchArgument(dataTypeMap, right, lhs) - rhs.end() - - case Or(left, right) => - val lhs = buildSearchArgument(dataTypeMap, left, builder.startOr()) - val rhs = buildSearchArgument(dataTypeMap, right, lhs) - rhs.end() - - case Not(child) => - buildSearchArgument(dataTypeMap, child, builder.startNot()).end() - - case other => - buildLeafSearchArgument(dataTypeMap, other, builder).getOrElse { - throw new SparkException( - "The input filter of OrcFilters.buildSearchArgument should be fully convertible.") - } - } - } - - /** - * Build a SearchArgument for a leaf predicate and return the builder so far. - * - * @param dataTypeMap a map from the attribute name to its data type. - * @param expression the input filter predicates. - * @param builder the input SearchArgument.Builder. - * @return the builder so far. - */ - private def buildLeafSearchArgument( - dataTypeMap: Map[String, OrcPrimitiveField], - expression: Filter, - builder: Builder): Option[Builder] = { - def getType(attribute: String): PredicateLeaf.Type = - getPredicateLeafType(dataTypeMap(attribute).fieldType) - - import org.apache.spark.sql.sources._ - - // NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()` - // call is mandatory. ORC `SearchArgument` builder requires that all leaf predicates must be - // wrapped by a "parent" predicate (`And`, `Or`, or `Not`). - expression match { - case EqualTo(name, value) if dataTypeMap.contains(name) => - val castedValue = castLiteralValue(value, dataTypeMap(name).fieldType) - Some(builder.startAnd() - .equals(dataTypeMap(name).fieldName, getType(name), castedValue).end()) - - case EqualNullSafe(name, value) if dataTypeMap.contains(name) => - val castedValue = castLiteralValue(value, dataTypeMap(name).fieldType) - Some(builder.startAnd() - .nullSafeEquals(dataTypeMap(name).fieldName, getType(name), castedValue).end()) - - case LessThan(name, value) if dataTypeMap.contains(name) => - val castedValue = castLiteralValue(value, dataTypeMap(name).fieldType) - Some(builder.startAnd() - .lessThan(dataTypeMap(name).fieldName, getType(name), castedValue).end()) - - case LessThanOrEqual(name, value) if dataTypeMap.contains(name) => - val castedValue = castLiteralValue(value, dataTypeMap(name).fieldType) - Some(builder.startAnd() - .lessThanEquals(dataTypeMap(name).fieldName, getType(name), castedValue).end()) - - case GreaterThan(name, value) if dataTypeMap.contains(name) => - val castedValue = castLiteralValue(value, dataTypeMap(name).fieldType) - Some(builder.startNot() - .lessThanEquals(dataTypeMap(name).fieldName, getType(name), castedValue).end()) - - case GreaterThanOrEqual(name, value) if dataTypeMap.contains(name) => - val castedValue = castLiteralValue(value, dataTypeMap(name).fieldType) - Some(builder.startNot() - .lessThan(dataTypeMap(name).fieldName, getType(name), castedValue).end()) - - case IsNull(name) if dataTypeMap.contains(name) => - Some(builder.startAnd() - .isNull(dataTypeMap(name).fieldName, getType(name)).end()) - - case IsNotNull(name) if dataTypeMap.contains(name) => - Some(builder.startNot() - .isNull(dataTypeMap(name).fieldName, getType(name)).end()) - - case In(name, values) if dataTypeMap.contains(name) => - val castedValues = values.map(v => castLiteralValue(v, dataTypeMap(name).fieldType)) - Some(builder.startAnd().in(dataTypeMap(name).fieldName, getType(name), - castedValues.map(_.asInstanceOf[AnyRef]): _*).end()) - - case _ => None - } - } -} diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/OrcFiltersBase.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/OrcFiltersBase.scala deleted file mode 100644 index d4fb2f260d6..00000000000 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/OrcFiltersBase.scala +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.sql.rapids - -import java.util.Locale - -import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap -import org.apache.spark.sql.sources.{And, Filter} -import org.apache.spark.sql.types.{AtomicType, BinaryType, DataType, StructField, StructType} - -/** - * Methods that can be shared when upgrading the built-in Hive. - * - * Derived from Apache Spark to avoid depending upon it directly, - * since its API has changed between Spark versions. - */ -trait OrcFiltersBase { - - private[sql] def buildTree(filters: Seq[Filter]): Option[Filter] = { - filters match { - case Seq() => None - case Seq(filter) => Some(filter) - case Seq(filter1, filter2) => Some(And(filter1, filter2)) - case _ => // length > 2 - val (left, right) = filters.splitAt(filters.length / 2) - Some(And(buildTree(left).get, buildTree(right).get)) - } - } - - case class OrcPrimitiveField(fieldName: String, fieldType: DataType) - - /** - * This method returns a map which contains ORC field name and data type. Each key - * represents a column; `dots` are used as separators for nested columns. If any part - * of the names contains `dots`, it is quoted to avoid confusion. See - * `org.apache.spark.sql.connector.catalog.quoted` for implementation details. - * - * BinaryType, UserDefinedType, ArrayType and MapType are ignored. - */ - protected[sql] def getSearchableTypeMap( - schema: StructType, - caseSensitive: Boolean): Map[String, OrcPrimitiveField] = { - import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper - - def getPrimitiveFields( - fields: Seq[StructField], - parentFieldNames: Seq[String] = Seq.empty): Seq[(String, OrcPrimitiveField)] = { - fields.flatMap { f => - f.dataType match { - case st: StructType => - getPrimitiveFields(st.fields, parentFieldNames :+ f.name) - case BinaryType => None - case _: AtomicType => - val fieldName = (parentFieldNames :+ f.name).quoted - val orcField = OrcPrimitiveField(fieldName, f.dataType) - Some((fieldName, orcField)) - case _ => None - } - } - } - - val primitiveFields = getPrimitiveFields(schema.fields) - if (caseSensitive) { - primitiveFields.toMap - } else { - // Don't consider ambiguity here, i.e. more than one field are matched in case insensitive - // mode, just skip pushdown for these fields, they will trigger Exception when reading, - // See: SPARK-25175. - val dedupPrimitiveFields = primitiveFields - .groupBy(_._1.toLowerCase(Locale.ROOT)) - .filter(_._2.size == 1) - .mapValues(_.head._2) - CaseInsensitiveMap(dedupPrimitiveFields) - } - } -} diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/collectionOperations.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/collectionOperations.scala index 3bf981d2c63..76ab58f140b 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/collectionOperations.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/collectionOperations.scala @@ -171,9 +171,8 @@ case class GpuElementAt(left: Expression, right: Expression, failOnError: Boolea if (!exist.isValid || exist.getBoolean) { lhs.getBase.getMapValue(rhs.getBase) } else { - throw new NoSuchElementException( - s"Key: ${rhs.getValue.asInstanceOf[UTF8String].toString} " + - s"does not exist in one of the rows in the map column") + RapidsErrorUtils.throwInvalidElementAtIndexError( + rhs.getValue.asInstanceOf[UTF8String].toString, true) } } } diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/complexTypeExtractors.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/complexTypeExtractors.scala index 8dd0635c988..14d152139eb 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/complexTypeExtractors.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/complexTypeExtractors.scala @@ -183,9 +183,8 @@ case class GpuGetMapValue(child: Expression, key: Expression, failOnError: Boole withResource(lhs.getBase.getMapKeyExistence(rhs.getBase)) { keyExistenceColumn => withResource(keyExistenceColumn.all) { exist => if (exist.isValid && !exist.getBoolean) { - throw new NoSuchElementException( - s"Key: ${rhs.getValue.asInstanceOf[UTF8String].toString} " + - s"does not exist in any one of the rows in the map column") + RapidsErrorUtils.throwInvalidElementAtIndexError( + rhs.getValue.asInstanceOf[UTF8String].toString) } } } diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastExchangeExec.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastExchangeExec.scala index a3ba5724c66..f3e5737c0ca 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastExchangeExec.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastExchangeExec.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,7 +26,6 @@ import scala.util.control.NonFatal import ai.rapids.cudf.{HostMemoryBuffer, JCudfSerialization, NvtxColor, NvtxRange} import ai.rapids.cudf.JCudfSerialization.SerializedTableHeader -import com.google.common.util.concurrent.ThreadFactoryBuilder import com.nvidia.spark.rapids._ import com.nvidia.spark.rapids.GpuMetric._ import com.nvidia.spark.rapids.RapidsPluginImplicits._ diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/stringFunctions.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/stringFunctions.scala index 5bcd9826028..827e1d75634 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/stringFunctions.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/stringFunctions.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.rapids import scala.collection.mutable.ArrayBuffer -import ai.rapids.cudf.{ColumnVector, ColumnView, DType, PadSide, Scalar, Table} +import ai.rapids.cudf.{BinaryOp, ColumnVector, ColumnView, DType, PadSide, Scalar, Table} import com.nvidia.spark.rapids._ import com.nvidia.spark.rapids.RapidsPluginImplicits._ import com.nvidia.spark.rapids.shims.v2.ShimExpression @@ -60,6 +60,32 @@ case class GpuLength(child: Expression) extends GpuUnaryExpression with ExpectsI input.getBase.getCharLengths() } +case class GpuBitLength(child: Expression) extends GpuUnaryExpression with ExpectsInputTypes { + + override def dataType: DataType = IntegerType + override def inputTypes: Seq[AbstractDataType] = Seq(StringType) + override def toString: String = s"bit_length($child)" + + override def doColumnar(input: GpuColumnVector): ColumnVector = { + withResource(input.getBase.getByteCount) { byteCnt => + // bit count = byte count * 8 + withResource(GpuScalar.from(3, IntegerType)) { factor => + byteCnt.binaryOp(BinaryOp.SHIFT_LEFT, factor, DType.INT32) + } + } + } +} + +case class GpuOctetLength(child: Expression) extends GpuUnaryExpression with ExpectsInputTypes { + + override def dataType: DataType = IntegerType + override def inputTypes: Seq[AbstractDataType] = Seq(StringType) + override def toString: String = s"octet_length($child)" + + override def doColumnar(input: GpuColumnVector): ColumnVector = + input.getBase.getByteCount +} + case class GpuStringLocate(substr: Expression, col: Expression, start: Expression) extends GpuTernaryExpression with ImplicitCastInputTypes { @@ -1296,7 +1322,7 @@ class GpuStringSplitMeta( } else { val str = regexp.get.value.asInstanceOf[UTF8String] if (str != null) { - if (!canRegexpBeTreatedLikeARegularString(str)) { + if (RegexParser.isRegExpString(str.toString)) { willNotWorkOnGpu("regular expressions are not supported yet") } if (str.numChars() == 0) { @@ -1320,7 +1346,7 @@ class GpuStringSplitMeta( case class GpuStringSplit(str: Expression, regex: Expression, limit: Expression) extends GpuTernaryExpression with ImplicitCastInputTypes { - override def dataType: DataType = ArrayType(StringType) + override def dataType: DataType = ArrayType(StringType, containsNull = false) override def inputTypes: Seq[DataType] = Seq(StringType, StringType, IntegerType) override def first: Expression = str override def second: Expression = regex diff --git a/tests/pom.xml b/tests/pom.xml index 356d2b18156..3329f0dee7c 100644 --- a/tests/pom.xml +++ b/tests/pom.xml @@ -54,12 +54,6 @@ ${cuda.version} provided - - com.nvidia - rapids-4-spark_${scala.binary.version} - ${project.version} - provided - com.nvidia diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/AdaptiveQueryExecSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/AdaptiveQueryExecSuite.scala index 5db550ba083..78913447dae 100644 --- a/tests/src/test/scala/com/nvidia/spark/rapids/AdaptiveQueryExecSuite.scala +++ b/tests/src/test/scala/com/nvidia/spark/rapids/AdaptiveQueryExecSuite.scala @@ -177,34 +177,54 @@ class AdaptiveQueryExecSuite spark.sql("INSERT INTO TABLE t1 SELECT a, b FROM testData").collect() spark.sql("INSERT INTO TABLE t2 SELECT a, b FROM testData").collect() - val df = spark.sql( - "SELECT t1.a, t2.b " + + // This test checks that inputs to the SHJ are coalesced. We need to check both sides + // if we are not optimizing the build-side coalescing logic, and only the stream side + // if the optimization is enabled (default). + // See `RapidsConf.SHUFFLED_HASH_JOIN_OPTIMIZE_SHUFFLE` for more information. + Seq(true, false).foreach { shouldOptimizeHashJoinShuffle => + spark.conf.set( + RapidsConf.SHUFFLED_HASH_JOIN_OPTIMIZE_SHUFFLE.key, + shouldOptimizeHashJoinShuffle.toString) + val df = spark.sql( + "SELECT t1.a, t2.b " + "FROM t1 " + "JOIN t2 " + "ON t1.a = t2.a " + "WHERE t2.a = 5" // filter on partition key to force dynamic partition pruning - ) - df.collect() + ) + df.collect() - val isAdaptiveQuery = df.queryExecution.executedPlan.isInstanceOf[AdaptiveSparkPlanExec] - if (cmpSparkVersion(3, 2, 0) < 0) { - // assert that DPP did cause this to run as a non-AQE plan prior to Spark 3.2.0 - assert(!isAdaptiveQuery) - } else { - // In 3.2.0 AQE works with DPP - assert(isAdaptiveQuery) + val isAdaptiveQuery = df.queryExecution.executedPlan.isInstanceOf[AdaptiveSparkPlanExec] + if (cmpSparkVersion(3, 2, 0) < 0) { + // assert that DPP did cause this to run as a non-AQE plan prior to Spark 3.2.0 + assert(!isAdaptiveQuery) + } else { + // In 3.2.0 AQE works with DPP + assert(isAdaptiveQuery) + } + + val shj = TestUtils.findOperator(df.queryExecution.executedPlan, + _.isInstanceOf[GpuShuffledHashJoinExec]).get + .asInstanceOf[GpuShuffledHashJoinExec] + assert(shj.children.length == 2) + val childrenToCheck = if (shouldOptimizeHashJoinShuffle) { + // assert that the stream side of SHJ is coalesced + shj.buildSide match { + case GpuBuildLeft => Seq(shj.right) + case GpuBuildRight => Seq(shj.left) + } + } else { + // assert that both the build and stream side of SHJ are coalesced + // if we are not optimizing the build side shuffle + shj.children + } + assert(childrenToCheck.forall { + case GpuShuffleCoalesceExec(_, _) => true + case GpuCoalesceBatches(GpuShuffleCoalesceExec(_, _), _) => true + case _ => false + }) } - // assert that both inputs to the SHJ are coalesced - val shj = TestUtils.findOperator(df.queryExecution.executedPlan, - _.isInstanceOf[GpuShuffledHashJoinExec]).get - assert(shj.children.length == 2) - assert(shj.children.forall { - case GpuShuffleCoalesceExec(_, _) => true - case GpuCoalesceBatches(GpuShuffleCoalesceExec(_, _), _) => true - case _ => false - }) - }, conf) } diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/GpuCoalesceBatchesSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/GpuCoalesceBatchesSuite.scala index bf29a411989..d5332628c5b 100644 --- a/tests/src/test/scala/com/nvidia/spark/rapids/GpuCoalesceBatchesSuite.scala +++ b/tests/src/test/scala/com/nvidia/spark/rapids/GpuCoalesceBatchesSuite.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -475,9 +475,9 @@ class GpuCoalesceBatchesSuite extends SparkQueryCompareTestSuite { val schema = new StructType().add("i", LongType) .add("j", DecimalType(ai.rapids.cudf.DType.DECIMAL64_MAX_PRECISION, 3)) val dummyMetric = WrappedGpuMetric(new SQLMetric("ignored")) - val coalesceIter = new GpuCoalesceIterator( + val coalesceIter = new GpuCompressionAwareCoalesceIterator( batchIter, - schema, + GpuColumnVector.extractTypes(schema), TargetSize(coalesceTargetBytes), maxCompressedBatchMemoryLimit, dummyMetric, @@ -559,9 +559,9 @@ class GpuCoalesceBatchesSuite extends SparkQueryCompareTestSuite { val schema = new StructType().add("i", LongType) .add("j", DecimalType(ai.rapids.cudf.DType.DECIMAL64_MAX_PRECISION, 3)) val dummyMetric = WrappedGpuMetric(new SQLMetric("ignored")) - val coalesceIter = new GpuCoalesceIterator( + val coalesceIter = new GpuCompressionAwareCoalesceIterator( batchIter, - schema, + GpuColumnVector.extractTypes(schema), TargetSize(coalesceTargetBytes), maxCompressedBatchMemoryLimit, dummyMetric, diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/GpuShuffledHashJoinExecSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/GpuShuffledHashJoinExecSuite.scala new file mode 100644 index 00000000000..ee11387c9ac --- /dev/null +++ b/tests/src/test/scala/com/nvidia/spark/rapids/GpuShuffledHashJoinExecSuite.scala @@ -0,0 +1,312 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids + +import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream} + +import ai.rapids.cudf.{ColumnVector, HostMemoryBuffer, JCudfSerialization, Table} +import org.mockito.ArgumentMatchers._ +import org.mockito.Mockito._ +import org.scalatest.FunSuite +import org.scalatest.mockito.MockitoSugar + +import org.apache.spark.SparkConf +import org.apache.spark.sql.catalyst.expressions.AttributeReference +import org.apache.spark.sql.types.IntegerType +import org.apache.spark.sql.vectorized.ColumnarBatch + +class GpuShuffledHashJoinExecSuite extends FunSuite with Arm with MockitoSugar { + val metricMap = mock[Map[String, GpuMetric]] + when(metricMap(any())).thenReturn(NoopMetric) + + test("fallback with empty build iterator") { + TestUtils.withGpuSparkSession(new SparkConf()) { _ => + val mockBuildIter = mock[Iterator[ColumnarBatch]] + when(mockBuildIter.hasNext).thenReturn(false) + val mockStreamIter = mock[Iterator[ColumnarBatch]] + val (builtBatch, bStreamIter) = GpuShuffledHashJoinExec.getBuiltBatchAndStreamIter( + 0, + Seq.empty, + mockBuildIter, + mockStreamIter, + mock[SpillCallback], + metricMap) + withResource(builtBatch) { _ => + // we ge an empty batch with no columns or rows + assertResult(builtBatch.numCols())(0) + assertResult(builtBatch.numRows())(0) + // 2 invocations, once in the `getBuiltBatchAndStreamIter` + // method, and a second one in `getSingleBatchWithVerification` + verify(mockBuildIter, times(2)).hasNext + verify(mockBuildIter, times(0)).next + verify(mockStreamIter, times(0)).hasNext + } + } + } + + test("fallback with 0 column build batches") { + TestUtils.withGpuSparkSession(new SparkConf()) { _ => + withResource(GpuColumnVector.emptyBatchFromTypes(Array.empty)) { + emptyBatch => + val buildIter = mock[Iterator[ColumnarBatch]] + when(buildIter.hasNext).thenReturn(true, false) + val buildBufferedIter = mock[BufferedIterator[ColumnarBatch]] + when(buildBufferedIter.hasNext).thenReturn(true, false) + when(buildBufferedIter.head).thenReturn(emptyBatch) + when(buildBufferedIter.next).thenReturn(emptyBatch) + when(buildIter.buffered).thenReturn(buildBufferedIter) + val mockStreamIter = mock[Iterator[ColumnarBatch]] + val (builtBatch, bStreamIter) = GpuShuffledHashJoinExec.getBuiltBatchAndStreamIter( + 0, + Seq.empty, + buildIter, + mockStreamIter, + mock[SpillCallback], + metricMap) + withResource(builtBatch) { _ => + assertResult(builtBatch.numCols())(0) + assertResult(builtBatch.numRows())(0) + // 1 invocation in the `getBuiltBatchAndStreamIter` + // after which a buffered iterator is obtained and used for the fallback case + verify(buildIter, times(1)).hasNext + verify(buildIter, times(1)).buffered + // we ask the buffered iterator for `head` to inspect the number of columns + verify(buildBufferedIter, times(1)).head + // the buffered iterator is passed to `getSingleBatchWithVerification`, + // and that code calls hasNext twice + verify(buildBufferedIter, times(2)).hasNext + // and calls next to get that batch we buffered + verify(buildBufferedIter, times(1)).next + verify(mockStreamIter, times(0)).hasNext + } + } + } + } + + test("fallback with a non-SerializedTableColumn 1 col and 0 rows") { + TestUtils.withGpuSparkSession(new SparkConf()) { _ => + val emptyBatch = GpuColumnVector.emptyBatchFromTypes(Seq(IntegerType).toArray) + val buildIter = Seq(emptyBatch).iterator + val mockStreamIter = mock[Iterator[ColumnarBatch]] + val (builtBatch, bStreamIter) = GpuShuffledHashJoinExec.getBuiltBatchAndStreamIter( + 0, + Seq.empty, + buildIter, + mockStreamIter, + mock[SpillCallback], + metricMap) + withResource(builtBatch) { _ => + assertResult(builtBatch.numCols())(1) + assertResult(builtBatch.numRows())(0) + // 2 invocations, once in the `getBuiltBatchAndStreamIter + // method, and one in `getSingleBatchWithVerification` + verify(mockStreamIter, times(0)).hasNext + // the buffered iterator drained the build iterator + assertResult(buildIter.hasNext)(false) + } + } + } + + test("fallback with a non-SerialiedTableColumn") { + TestUtils.withGpuSparkSession(new SparkConf()) { _ => + closeOnExcept(ColumnVector.fromInts(1, 2, 3, 4, 5)) { cudfCol => + val cv = GpuColumnVector.from(cudfCol, IntegerType) + val batch = new ColumnarBatch(Seq(cv).toArray, 5) + val buildIter = Seq(batch).iterator + val mockStreamIter = mock[Iterator[ColumnarBatch]] + val (builtBatch, bStreamIter) = GpuShuffledHashJoinExec.getBuiltBatchAndStreamIter( + 0, + Seq.empty, + buildIter, + mockStreamIter, + mock[SpillCallback], + metricMap) + withResource(builtBatch) { _ => + assertResult(builtBatch.numCols())(1) + assertResult(builtBatch.numRows())(5) + // 2 invocations, once in the `getBuiltBatchAndStreamIter + // method, and one in `getSingleBatchWithVerification` + verify(mockStreamIter, times(0)).hasNext + // the buffered iterator drained the build iterator + assertResult(buildIter.hasNext)(false) + } + } + } + } + + def getSerializedBatch(tbl: Table): ColumnarBatch = { + val outStream = new ByteArrayOutputStream() + JCudfSerialization.writeToStream(tbl, outStream, 0, tbl.getRowCount) + val dIn = new DataInputStream(new ByteArrayInputStream(outStream.toByteArray)) + val header = new JCudfSerialization.SerializedTableHeader(dIn) + closeOnExcept(HostMemoryBuffer.allocate(header.getDataLen, false)) { hostBuffer => + JCudfSerialization.readTableIntoBuffer(dIn, header, hostBuffer) + SerializedTableColumn.from(header, hostBuffer) + } + } + + def getSerializedBatch(numRows: Int): ColumnarBatch = { + val outStream = new ByteArrayOutputStream() + JCudfSerialization.writeRowsToStream(outStream, numRows) + val dIn = new DataInputStream(new ByteArrayInputStream(outStream.toByteArray)) + val header = new JCudfSerialization.SerializedTableHeader(dIn) + closeOnExcept(HostMemoryBuffer.allocate(header.getDataLen, false)) { hostBuffer => + JCudfSerialization.readTableIntoBuffer(dIn, header, hostBuffer) + SerializedTableColumn.from(header, hostBuffer) + } + } + + test("test a 0-column SerializedTableColumn") { + TestUtils.withGpuSparkSession(new SparkConf()) { _ => + val serializedBatch = getSerializedBatch(5) + val mockStreamIter = mock[Iterator[ColumnarBatch]] + val mockBufferedStreamIterator = mock[BufferedIterator[ColumnarBatch]] + when(mockStreamIter.hasNext).thenReturn(true) + when(mockStreamIter.buffered).thenReturn(mockBufferedStreamIterator) + when(mockBufferedStreamIterator.hasNext).thenReturn(true) + closeOnExcept(serializedBatch) { _ => + val buildIter = Seq(serializedBatch).iterator + val attrs = AttributeReference("a", IntegerType, false)() :: Nil + val (builtBatch, bStreamIter) = GpuShuffledHashJoinExec.getBuiltBatchAndStreamIter( + 1024, + attrs, + buildIter, + mockStreamIter, + mock[SpillCallback], + metricMap) + withResource(builtBatch) { _ => + verify(mockBufferedStreamIterator, times(1)).hasNext + assertResult(builtBatch.numCols())(0) + assertResult(builtBatch.numRows())(5) + // the buffered iterator drained the build iterator + assertResult(buildIter.hasNext)(false) + } + } + } + } + + test("test a SerializedTableColumn") { + TestUtils.withGpuSparkSession(new SparkConf()) { _ => + closeOnExcept(ColumnVector.fromInts(1, 2, 3, 4, 5)) { cudfCol => + val cv = GpuColumnVector.from(cudfCol, IntegerType) + val batch = new ColumnarBatch(Seq(cv).toArray, 5) + withResource(GpuColumnVector.from(batch)) { tbl => + val serializedBatch = getSerializedBatch(tbl) + val mockStreamIter = mock[Iterator[ColumnarBatch]] + val mockBufferedStreamIterator = mock[BufferedIterator[ColumnarBatch]] + when(mockStreamIter.hasNext).thenReturn(true) + when(mockStreamIter.buffered).thenReturn(mockBufferedStreamIterator) + when(mockBufferedStreamIterator.hasNext).thenReturn(true) + closeOnExcept(serializedBatch) { _ => + val buildIter = Seq(serializedBatch).iterator + val attrs = AttributeReference("a", IntegerType, false)() :: Nil + val (builtBatch, bStreamIter) = GpuShuffledHashJoinExec.getBuiltBatchAndStreamIter( + 1024, + attrs, + buildIter, + mockStreamIter, + mock[SpillCallback], + metricMap) + withResource(builtBatch) { _ => + verify(mockBufferedStreamIterator, times(1)).hasNext + assertResult(builtBatch.numCols())(1) + assertResult(builtBatch.numRows())(5) + // the buffered iterator drained the build iterator + assertResult(buildIter.hasNext)(false) + } + } + } + } + } + } + + test("test two batches, going over the limit") { + TestUtils.withGpuSparkSession(new SparkConf()) { _ => + closeOnExcept(ColumnVector.fromInts(1, 2, 3, 4, 5)) { cudfCol => + val cv = GpuColumnVector.from(cudfCol, IntegerType) + val batch = new ColumnarBatch(Seq(cv).toArray, 5) + withResource(GpuColumnVector.from(batch)) { tbl => + val serializedBatch = getSerializedBatch(tbl) + val serializedBatch2 = getSerializedBatch(tbl) + val mockStreamIter = mock[Iterator[ColumnarBatch]] + val mockBufferedStreamIterator = mock[BufferedIterator[ColumnarBatch]] + when(mockStreamIter.hasNext).thenReturn(true) + when(mockStreamIter.buffered).thenReturn(mockBufferedStreamIterator) + when(mockBufferedStreamIterator.hasNext).thenReturn(true) + closeOnExcept(serializedBatch) { _ => + closeOnExcept(serializedBatch2) { _ => + val buildIter = Seq(serializedBatch, serializedBatch2).iterator + val attrs = AttributeReference("a", IntegerType, false)() :: Nil + val (builtBatch, bStreamIter) = GpuShuffledHashJoinExec.getBuiltBatchAndStreamIter( + 1, + attrs, + buildIter, + mockStreamIter, + mock[SpillCallback], + metricMap) + withResource(builtBatch) { _ => + verify(mockBufferedStreamIterator, times(0)).hasNext + assertResult(builtBatch.numCols())(1) + assertResult(builtBatch.numRows())(10) + // the buffered iterator drained the build iterator + assertResult(buildIter.hasNext)(false) + } + } + } + } + } + } + } + + test("test two batches, stating within the limit") { + TestUtils.withGpuSparkSession(new SparkConf()) { _ => + closeOnExcept(ColumnVector.fromInts(1, 2, 3, 4, 5)) { cudfCol => + val cv = GpuColumnVector.from(cudfCol, IntegerType) + val batch = new ColumnarBatch(Seq(cv).toArray, 5) + withResource(GpuColumnVector.from(batch)) { tbl => + val serializedBatch = getSerializedBatch(tbl) + val serializedBatch2 = getSerializedBatch(tbl) + val mockStreamIter = mock[Iterator[ColumnarBatch]] + val mockBufferedStreamIterator = mock[BufferedIterator[ColumnarBatch]] + when(mockStreamIter.hasNext).thenReturn(true) + when(mockStreamIter.buffered).thenReturn(mockBufferedStreamIterator) + when(mockBufferedStreamIterator.hasNext).thenReturn(true) + closeOnExcept(serializedBatch) { _ => + closeOnExcept(serializedBatch2) { _ => + val buildIter = Seq(serializedBatch, serializedBatch2).iterator + val attrs = AttributeReference("a", IntegerType, false)() :: Nil + val (builtBatch, bStreamIter) = GpuShuffledHashJoinExec.getBuiltBatchAndStreamIter( + 1024, + attrs, + buildIter, + mockStreamIter, + mock[SpillCallback], + metricMap) + withResource(builtBatch) { _ => + verify(mockBufferedStreamIterator, times(1)).hasNext + assertResult(builtBatch.numCols())(1) + assertResult(builtBatch.numRows())(10) + // the buffered iterator drained the build iterator + assertResult(buildIter.hasNext)(false) + } + } + } + } + } + } + } +} diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/OrcScanSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/OrcScanSuite.scala index 997409412fb..a94affbf08d 100644 --- a/tests/src/test/scala/com/nvidia/spark/rapids/OrcScanSuite.scala +++ b/tests/src/test/scala/com/nvidia/spark/rapids/OrcScanSuite.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -106,6 +106,9 @@ class OrcScanSuite extends SparkQueryCompareTestSuite { * is actually 1582-09-23 in proleptic Gregorian calendar. */ test("test hybrid Julian Gregorian calendar vs proleptic Gregorian calendar") { + // After Spark 3.1.1, Orc failed to prune when converting Hybrid calendar to Proleptic calendar + // Orc bug: https://issues.apache.org/jira/browse/ORC-1083 + assumePriorToSpark311 withCpuSparkSession(spark => { val df = frameFromOrcWithSchema("hybrid-Julian-calendar.orc", diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/RegularExpressionParserSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/RegularExpressionParserSuite.scala index 2d31835cf55..389fe7800af 100644 --- a/tests/src/test/scala/com/nvidia/spark/rapids/RegularExpressionParserSuite.scala +++ b/tests/src/test/scala/com/nvidia/spark/rapids/RegularExpressionParserSuite.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,24 @@ import org.scalatest.FunSuite class RegularExpressionParserSuite extends FunSuite { + test("detect regexp strings") { + // Based on https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html + val strings: Seq[String] = Seq("\\", "\u0000", "\\x00", "\\.", + "\f", "\\a", "\\e", "\\cx", "[abc]", "^", "[a-z&&[def]]", ".", "*", "\\d", "\\D", + "\\h", "\\H", "\\s", "\\S", "\\v", "\\V", "\\w", "\\w", "\\p", "$", "\\b", "\\B", + "\\A", "\\G", "\\Z", "\\z", "\\R", "?", "|", "(abc)", "a{1,}", "\\k", "\\Q", "\\E") + for (string <- strings) { + assert(RegexParser.isRegExpString(string)) + } + } + + test("detect non-regexp strings") { + val strings = Seq("A", ",", "\t", ":", "") + for (string <- strings) { + assert(!RegexParser.isRegExpString(string)) + } + } + test("empty pattern") { assert(parse("") === RegexSequence(ListBuffer())) } diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/SparkQueryCompareTestSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/SparkQueryCompareTestSuite.scala index 1f84b04ad77..b8357c9db15 100644 --- a/tests/src/test/scala/com/nvidia/spark/rapids/SparkQueryCompareTestSuite.scala +++ b/tests/src/test/scala/com/nvidia/spark/rapids/SparkQueryCompareTestSuite.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1835,6 +1835,9 @@ trait SparkQueryCompareTestSuite extends FunSuite with Arm { def assumeSpark320orLater: Assertion = assume(VersionUtils.isSpark320OrLater, "Spark version not 3.2.0+") + def assumePriorToSpark311: Assertion = + assume(!VersionUtils.isSpark311OrLater, "Spark version not before 3.1.1") + def cmpSparkVersion(major: Int, minor: Int, bugfix: Int): Int = { val sparkShimVersion = ShimLoader.getSparkShims.getSparkShimVersion val (sparkMajor, sparkMinor, sparkBugfix) = sparkShimVersion match { diff --git a/tools/pom.xml b/tools/pom.xml index d8fffb17ccb..25fe91d7f5f 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -40,6 +40,11 @@ + + com.nvidia + rapids-4-spark-common_${scala.binary.version} + ${project.version} + org.scala-lang scala-library @@ -100,18 +105,14 @@ org.rogach:scallop_${scala.binary.version} + com.nvidia:rapids-4-spark-common_${scala.binary.version} - - org.rogach:scallop_${scala.binary.version}:* - - META-INF/*.MF - - *:* + META-INF/*.MF META-INF/*.SF META-INF/*.DSA META-INF/*.RSA diff --git a/tools/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala b/tools/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala index c8ef1f7cd9d..137e35a8f7b 100644 --- a/tools/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala +++ b/tools/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,7 +22,7 @@ import scala.collection.JavaConverters._ import scala.collection.mutable.{ArrayBuffer, HashMap} import scala.util.control.NonFatal -import com.google.common.util.concurrent.ThreadFactoryBuilder +import com.nvidia.spark.rapids.ThreadFactoryBuilder import com.nvidia.spark.rapids.tool.{EventLogInfo, EventLogPathProcessor} import org.apache.hadoop.conf.Configuration diff --git a/tools/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala b/tools/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala index 33579a98b3d..a895d28fe69 100644 --- a/tools/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala +++ b/tools/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ import java.util.concurrent.{ConcurrentLinkedQueue, Executors, ThreadPoolExecuto import scala.collection.JavaConverters._ -import com.google.common.util.concurrent.ThreadFactoryBuilder +import com.nvidia.spark.rapids.ThreadFactoryBuilder import com.nvidia.spark.rapids.tool.EventLogInfo import org.apache.hadoop.conf.Configuration diff --git a/tools/src/main/scala/org/apache/spark/sql/rapids/tool/AppFilterImpl.scala b/tools/src/main/scala/org/apache/spark/sql/rapids/tool/AppFilterImpl.scala index 2b95e7639cd..5ebae2a075b 100644 --- a/tools/src/main/scala/org/apache/spark/sql/rapids/tool/AppFilterImpl.scala +++ b/tools/src/main/scala/org/apache/spark/sql/rapids/tool/AppFilterImpl.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,7 +22,7 @@ import java.util.regex.PatternSyntaxException import scala.collection.JavaConverters._ -import com.google.common.util.concurrent.ThreadFactoryBuilder +import com.nvidia.spark.rapids.ThreadFactoryBuilder import com.nvidia.spark.rapids.tool.EventLogInfo import com.nvidia.spark.rapids.tool.qualification.QualificationArgs import org.apache.hadoop.conf.Configuration diff --git a/udf-examples/pom.xml b/udf-examples/pom.xml index e5bc938f9f7..f44cce94e81 100644 --- a/udf-examples/pom.xml +++ b/udf-examples/pom.xml @@ -142,6 +142,12 @@ ${spark.version} provided + + org.apache.hive + hive-storage-api + ${spark.version} + provided +