From 469637d75bab64782455d13b3cfe4452cee748f8 Mon Sep 17 00:00:00 2001 From: Andrii Rosa Date: Tue, 10 Dec 2019 16:31:36 -0500 Subject: [PATCH] Set boolean value to 0 if the position is null MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setting the actual value to a random 0 or 1 is rather not intuitive and error prone. We may expect the boolean (or integer) value to be set to 0 if the position is null in some places across the code Benchmarks Before: Benchmark (typeSignature) (withNulls) Mode Cnt Score Error Units BenchmarkBatchStreamReadersWithZstd.readBlocksWithJni boolean PARTIAL avgt 20 0.022 ± 0.002 s/op BenchmarkBatchStreamReadersWithZstd.readBlocksWithJni tinyint PARTIAL avgt 20 0.022 ± 0.003 s/op BenchmarkBatchStreamReadersWithZstd.readBlocksWithJni smallint PARTIAL avgt 20 0.075 ± 0.006 s/op BenchmarkBatchStreamReadersWithZstd.readBlocksWithJni integer PARTIAL avgt 20 0.107 ± 0.010 s/op BenchmarkBatchStreamReadersWithZstd.readBlocksWithJni bigint PARTIAL avgt 20 0.167 ± 0.010 s/op BenchmarkBatchStreamReadersWithZstd.readBlocksWithoutJni boolean PARTIAL avgt 20 0.023 ± 0.003 s/op BenchmarkBatchStreamReadersWithZstd.readBlocksWithoutJni tinyint PARTIAL avgt 20 0.022 ± 0.004 s/op BenchmarkBatchStreamReadersWithZstd.readBlocksWithoutJni smallint PARTIAL avgt 20 0.091 ± 0.006 s/op BenchmarkBatchStreamReadersWithZstd.readBlocksWithoutJni integer PARTIAL avgt 20 0.122 ± 0.008 s/op BenchmarkBatchStreamReadersWithZstd.readBlocksWithoutJni bigint PARTIAL avgt 20 0.199 ± 0.011 s/op After: Benchmark (typeSignature) (withNulls) Mode Cnt Score Error Units BenchmarkBatchStreamReadersWithZstd.readBlocksWithJni boolean PARTIAL avgt 20 0.022 ± 0.004 s/op BenchmarkBatchStreamReadersWithZstd.readBlocksWithJni tinyint PARTIAL avgt 20 0.021 ± 0.004 s/op BenchmarkBatchStreamReadersWithZstd.readBlocksWithJni smallint PARTIAL avgt 20 0.073 ± 0.006 s/op BenchmarkBatchStreamReadersWithZstd.readBlocksWithJni integer PARTIAL avgt 20 0.103 ± 0.011 s/op BenchmarkBatchStreamReadersWithZstd.readBlocksWithJni bigint PARTIAL avgt 20 0.172 ± 0.010 s/op BenchmarkBatchStreamReadersWithZstd.readBlocksWithoutJni boolean PARTIAL avgt 20 0.022 ± 0.003 s/op BenchmarkBatchStreamReadersWithZstd.readBlocksWithoutJni tinyint PARTIAL avgt 20 0.021 ± 0.004 s/op BenchmarkBatchStreamReadersWithZstd.readBlocksWithoutJni smallint PARTIAL avgt 20 0.093 ± 0.010 s/op BenchmarkBatchStreamReadersWithZstd.readBlocksWithoutJni integer PARTIAL avgt 20 0.123 ± 0.011 s/op BenchmarkBatchStreamReadersWithZstd.readBlocksWithoutJni bigint PARTIAL avgt 20 0.196 ± 0.011 s/op --- .../presto/orc/reader/ReaderUtils.java | 8 +++--- .../com/facebook/presto/orc/OrcTester.java | 27 ++++++++++++++++++- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/reader/ReaderUtils.java b/presto-orc/src/main/java/com/facebook/presto/orc/reader/ReaderUtils.java index d68806f9b7e4..720023399de5 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/reader/ReaderUtils.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/reader/ReaderUtils.java @@ -51,8 +51,8 @@ public static byte[] unpackByteNulls(byte[] values, boolean[] isNull) int position = 0; for (int i = 0; i < isNull.length; i++) { - result[i] = values[position]; if (!isNull[i]) { + result[i] = values[position]; position++; } } @@ -65,8 +65,8 @@ public static short[] unpackShortNulls(short[] values, boolean[] isNull) int position = 0; for (int i = 0; i < isNull.length; i++) { - result[i] = values[position]; if (!isNull[i]) { + result[i] = values[position]; position++; } } @@ -79,8 +79,8 @@ public static int[] unpackIntNulls(int[] values, boolean[] isNull) int position = 0; for (int i = 0; i < isNull.length; i++) { - result[i] = values[position]; if (!isNull[i]) { + result[i] = values[position]; position++; } } @@ -93,8 +93,8 @@ public static long[] unpackLongNulls(long[] values, boolean[] isNull) int position = 0; for (int i = 0; i < isNull.length; i++) { - result[i] = values[position]; if (!isNull[i]) { + result[i] = values[position]; position++; } } diff --git a/presto-orc/src/test/java/com/facebook/presto/orc/OrcTester.java b/presto-orc/src/test/java/com/facebook/presto/orc/OrcTester.java index 34b1d994a6c2..34ded96c18cb 100644 --- a/presto-orc/src/test/java/com/facebook/presto/orc/OrcTester.java +++ b/presto-orc/src/test/java/com/facebook/presto/orc/OrcTester.java @@ -185,6 +185,7 @@ import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector; import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getCharTypeInfo; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotNull; import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; @@ -799,8 +800,8 @@ public static void assertFileContentsPresto( for (int i = 0; i < types.size(); i++) { Type type = types.get(i); Block block = page.getBlock(i); - assertEquals(block.getPositionCount(), positionCount); + checkNullValues(type, block); List data = new ArrayList<>(positionCount); for (int position = 0; position < positionCount; position++) { @@ -893,6 +894,7 @@ else if (skipFirstBatch && isFirst) { Type type = types.get(i); Block block = recordReader.readBlock(i); assertEquals(block.getPositionCount(), batchSize); + checkNullValues(type, block); List data = new ArrayList<>(block.getPositionCount()); for (int position = 0; position < block.getPositionCount(); position++) { @@ -1899,6 +1901,29 @@ else if (type.getTypeSignature().getBase().equals(StandardTypes.ROW)) { throw new IllegalArgumentException("unsupported type: " + type); } + private static void checkNullValues(Type type, Block block) + { + if (!block.mayHaveNull()) { + return; + } + for (int position = 0; position < block.getPositionCount(); position++) { + if (block.isNull(position)) { + if (type.equals(TINYINT) || type.equals(SMALLINT) || type.equals(INTEGER) || type.equals(BIGINT) || type.equals(REAL) || type.equals(DATE) || type.equals(TIMESTAMP)) { + assertEquals(type.getLong(block, position), 0); + } + if (type.equals(BOOLEAN)) { + assertFalse(type.getBoolean(block, position)); + } + if (type.equals(DOUBLE)) { + assertEquals(type.getDouble(block, position), 0.0); + } + if (type instanceof VarcharType || type instanceof CharType || type.equals(VARBINARY)) { + assertEquals(type.getSlice(block, position).length(), 0); + } + } + } + } + private static void setDwrfLowMemoryFlag(RecordWriter recordWriter) { Object writer = getFieldValue(recordWriter, "writer");