-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[HUDI-7559] [1/n] Fix RecordLevelIndexSupport::filterQueryWithRecordK…
…ey (#10947) RecordLevelIndexSupport::filterQueryWithRecordKey() throws a NPE if the EqualTo query predicate is not of the form `AttributeReference = Literal`. This is because RecordLevelIndexSupport:::getAttributeLiteralTuple() returns null in such cases which is then derefercend unconditionally. This bug was rendering the functional index to not be used even when the query predicate had spark functions on which functional index is built. Hence these column-stats based functional index was not pruning files. This PR makes the following minor changes. 1. Move some methods in RecordLevelIndexSupport into an object to make it static (to aid in unit testing) 2. Fix filterQueryWithRecordKey() by checking for null return values from the call to getAttributeLiteralTuple 3. Add unit tests in TestRecordLevelIndexSupport.scala Co-authored-by: Vinaykumar Bhat <vinay@onehouse.ai>
- Loading branch information
Showing
2 changed files
with
145 additions
and
49 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
88 changes: 88 additions & 0 deletions
88
...source/hudi-spark-common/src/test/scala/org/apache/hudi/TestRecordLevelIndexSupport.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.apache.hudi | ||
|
||
import org.apache.hudi.common.model.HoodieRecord.HoodieMetadataField | ||
import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Expression, FromUnixTime, GreaterThan, In, Literal, Not} | ||
import org.apache.spark.sql.types.StringType | ||
import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} | ||
import org.junit.jupiter.api.Test | ||
|
||
import java.util.TimeZone | ||
|
||
class TestRecordLevelIndexSupport { | ||
@Test | ||
def testFilterQueryWithRecordKey(): Unit = { | ||
// Case 1: EqualTo filters not on simple AttributeReference and non-Literal should return empty result | ||
val fmt = "yyyy-MM-dd HH:mm:ss" | ||
val fromUnixTime = FromUnixTime(Literal(0L), Literal(fmt), Some(TimeZone.getDefault.getID)) | ||
var testFilter: Expression = EqualTo(fromUnixTime, Literal("2020-01-01 00:10:20")) | ||
var result = RecordLevelIndexSupport.filterQueryWithRecordKey(testFilter, Option.empty) | ||
assertTrue(result.isEmpty) | ||
|
||
// Case 2: EqualTo filters not on Literal and not on simple AttributeReference should return empty result | ||
testFilter = EqualTo(Literal("2020-01-01 00:10:20"), fromUnixTime) | ||
result = RecordLevelIndexSupport.filterQueryWithRecordKey(testFilter, Option.empty) | ||
assertTrue(result.isEmpty) | ||
|
||
// Case 3: EqualTo filters on simple AttributeReference and non-Literal should return empty result | ||
testFilter = EqualTo(AttributeReference("_row_key", StringType, nullable = true)(), fromUnixTime) | ||
result = RecordLevelIndexSupport.filterQueryWithRecordKey(testFilter, Option.empty) | ||
assertTrue(result.isEmpty) | ||
|
||
// Case 4: EqualTo filters on simple AttributeReference and Literal which should return non-empty result | ||
testFilter = EqualTo(AttributeReference("_row_key", StringType, nullable = true)(), Literal("row1")) | ||
result = RecordLevelIndexSupport.filterQueryWithRecordKey(testFilter, Option.apply(HoodieMetadataField.RECORD_KEY_METADATA_FIELD.getFieldName)) | ||
assertTrue(result.isDefined) | ||
assertEquals(result, Option.apply(testFilter, List.apply("row1"))) | ||
|
||
// case 5: EqualTo on fields other than record key should return empty result | ||
result = RecordLevelIndexSupport.filterQueryWithRecordKey(testFilter, Option.apply("blah")) | ||
assertTrue(result.isEmpty) | ||
|
||
// Case 6: In filter on fields other than record key should return empty result | ||
testFilter = In(AttributeReference("_row_key", StringType, nullable = true)(), List.apply(Literal("xyz"), Literal("abc"))) | ||
result = RecordLevelIndexSupport.filterQueryWithRecordKey(testFilter, Option.apply("blah")) | ||
assertTrue(result.isEmpty) | ||
|
||
// Case 7: In filter on record key should return non-empty result | ||
testFilter = In(AttributeReference("_row_key", StringType, nullable = true)(), List.apply(Literal("xyz"), Literal("abc"))) | ||
result = RecordLevelIndexSupport.filterQueryWithRecordKey(testFilter, Option.apply(HoodieMetadataField.RECORD_KEY_METADATA_FIELD.getFieldName)) | ||
assertTrue(result.isDefined) | ||
|
||
// Case 8: In filter on simple AttributeReference(on record-key) and non-Literal should return empty result | ||
testFilter = In(AttributeReference("_row_key", StringType, nullable = true)(), List.apply(fromUnixTime)) | ||
result = RecordLevelIndexSupport.filterQueryWithRecordKey(testFilter, Option.apply(HoodieMetadataField.RECORD_KEY_METADATA_FIELD.getFieldName)) | ||
assertTrue(result.isEmpty) | ||
|
||
// Case 9: Anything other than EqualTo and In predicate is not supported. Hence it returns empty result | ||
testFilter = Not(In(AttributeReference("_row_key", StringType, nullable = true)(), List.apply(Literal("xyz"), Literal("abc")))) | ||
result = RecordLevelIndexSupport.filterQueryWithRecordKey(testFilter, Option.apply(HoodieMetadataField.RECORD_KEY_METADATA_FIELD.getFieldName)) | ||
assertTrue(result.isEmpty) | ||
|
||
testFilter = Not(In(AttributeReference("_row_key", StringType, nullable = true)(), List.apply(fromUnixTime))) | ||
result = RecordLevelIndexSupport.filterQueryWithRecordKey(testFilter, Option.apply(HoodieMetadataField.RECORD_KEY_METADATA_FIELD.getFieldName)) | ||
assertTrue(result.isEmpty) | ||
|
||
testFilter = GreaterThan(AttributeReference("_row_key", StringType, nullable = true)(), Literal("row1")) | ||
result = RecordLevelIndexSupport.filterQueryWithRecordKey(testFilter, Option.apply(HoodieMetadataField.RECORD_KEY_METADATA_FIELD.getFieldName)) | ||
assertTrue(result.isEmpty) | ||
} | ||
} |