Skip to content

Commit

Permalink
Set boolean value to 0 if the position is null
Browse files Browse the repository at this point in the history
Setting the actual value to a random 0 or 1 is rather not
intuitive and error prone. We may expect the boolean (or integer) value
to be set to 0 if the position is null in some places across the code

Benchmarks

Before:

Benchmark                                                 (typeSignature)  (withNulls)  Mode  Cnt  Score   Error  Units
BenchmarkBatchStreamReadersWithZstd.readBlocksWithJni             boolean      PARTIAL  avgt   20  0.022 ± 0.002   s/op
BenchmarkBatchStreamReadersWithZstd.readBlocksWithJni             tinyint      PARTIAL  avgt   20  0.022 ± 0.003   s/op
BenchmarkBatchStreamReadersWithZstd.readBlocksWithJni            smallint      PARTIAL  avgt   20  0.075 ± 0.006   s/op
BenchmarkBatchStreamReadersWithZstd.readBlocksWithJni             integer      PARTIAL  avgt   20  0.107 ± 0.010   s/op
BenchmarkBatchStreamReadersWithZstd.readBlocksWithJni              bigint      PARTIAL  avgt   20  0.167 ± 0.010   s/op
BenchmarkBatchStreamReadersWithZstd.readBlocksWithoutJni          boolean      PARTIAL  avgt   20  0.023 ± 0.003   s/op
BenchmarkBatchStreamReadersWithZstd.readBlocksWithoutJni          tinyint      PARTIAL  avgt   20  0.022 ± 0.004   s/op
BenchmarkBatchStreamReadersWithZstd.readBlocksWithoutJni         smallint      PARTIAL  avgt   20  0.091 ± 0.006   s/op
BenchmarkBatchStreamReadersWithZstd.readBlocksWithoutJni          integer      PARTIAL  avgt   20  0.122 ± 0.008   s/op
BenchmarkBatchStreamReadersWithZstd.readBlocksWithoutJni           bigint      PARTIAL  avgt   20  0.199 ± 0.011   s/op

After:

Benchmark                                                 (typeSignature)  (withNulls)  Mode  Cnt  Score   Error  Units
BenchmarkBatchStreamReadersWithZstd.readBlocksWithJni             boolean      PARTIAL  avgt   20  0.022 ± 0.004   s/op
BenchmarkBatchStreamReadersWithZstd.readBlocksWithJni             tinyint      PARTIAL  avgt   20  0.021 ± 0.004   s/op
BenchmarkBatchStreamReadersWithZstd.readBlocksWithJni            smallint      PARTIAL  avgt   20  0.073 ± 0.006   s/op
BenchmarkBatchStreamReadersWithZstd.readBlocksWithJni             integer      PARTIAL  avgt   20  0.103 ± 0.011   s/op
BenchmarkBatchStreamReadersWithZstd.readBlocksWithJni              bigint      PARTIAL  avgt   20  0.172 ± 0.010   s/op
BenchmarkBatchStreamReadersWithZstd.readBlocksWithoutJni          boolean      PARTIAL  avgt   20  0.022 ± 0.003   s/op
BenchmarkBatchStreamReadersWithZstd.readBlocksWithoutJni          tinyint      PARTIAL  avgt   20  0.021 ± 0.004   s/op
BenchmarkBatchStreamReadersWithZstd.readBlocksWithoutJni         smallint      PARTIAL  avgt   20  0.093 ± 0.010   s/op
BenchmarkBatchStreamReadersWithZstd.readBlocksWithoutJni          integer      PARTIAL  avgt   20  0.123 ± 0.011   s/op
BenchmarkBatchStreamReadersWithZstd.readBlocksWithoutJni           bigint      PARTIAL  avgt   20  0.196 ± 0.011   s/op
  • Loading branch information
arhimondr committed Dec 12, 2019
1 parent 8c2dac4 commit 469637d
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ public static byte[] unpackByteNulls(byte[] values, boolean[] isNull)

int position = 0;
for (int i = 0; i < isNull.length; i++) {
result[i] = values[position];
if (!isNull[i]) {
result[i] = values[position];
position++;
}
}
Expand All @@ -65,8 +65,8 @@ public static short[] unpackShortNulls(short[] values, boolean[] isNull)

int position = 0;
for (int i = 0; i < isNull.length; i++) {
result[i] = values[position];
if (!isNull[i]) {
result[i] = values[position];
position++;
}
}
Expand All @@ -79,8 +79,8 @@ public static int[] unpackIntNulls(int[] values, boolean[] isNull)

int position = 0;
for (int i = 0; i < isNull.length; i++) {
result[i] = values[position];
if (!isNull[i]) {
result[i] = values[position];
position++;
}
}
Expand All @@ -93,8 +93,8 @@ public static long[] unpackLongNulls(long[] values, boolean[] isNull)

int position = 0;
for (int i = 0; i < isNull.length; i++) {
result[i] = values[position];
if (!isNull[i]) {
result[i] = values[position];
position++;
}
}
Expand Down
27 changes: 26 additions & 1 deletion presto-orc/src/test/java/com/facebook/presto/orc/OrcTester.java
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@
import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector;
import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getCharTypeInfo;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertNotNull;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;
Expand Down Expand Up @@ -799,8 +800,8 @@ public static void assertFileContentsPresto(
for (int i = 0; i < types.size(); i++) {
Type type = types.get(i);
Block block = page.getBlock(i);

assertEquals(block.getPositionCount(), positionCount);
checkNullValues(type, block);

List<Object> data = new ArrayList<>(positionCount);
for (int position = 0; position < positionCount; position++) {
Expand Down Expand Up @@ -893,6 +894,7 @@ else if (skipFirstBatch && isFirst) {
Type type = types.get(i);
Block block = recordReader.readBlock(i);
assertEquals(block.getPositionCount(), batchSize);
checkNullValues(type, block);

List<Object> data = new ArrayList<>(block.getPositionCount());
for (int position = 0; position < block.getPositionCount(); position++) {
Expand Down Expand Up @@ -1899,6 +1901,29 @@ else if (type.getTypeSignature().getBase().equals(StandardTypes.ROW)) {
throw new IllegalArgumentException("unsupported type: " + type);
}

private static void checkNullValues(Type type, Block block)
{
if (!block.mayHaveNull()) {
return;
}
for (int position = 0; position < block.getPositionCount(); position++) {
if (block.isNull(position)) {
if (type.equals(TINYINT) || type.equals(SMALLINT) || type.equals(INTEGER) || type.equals(BIGINT) || type.equals(REAL) || type.equals(DATE) || type.equals(TIMESTAMP)) {
assertEquals(type.getLong(block, position), 0);
}
if (type.equals(BOOLEAN)) {
assertFalse(type.getBoolean(block, position));
}
if (type.equals(DOUBLE)) {
assertEquals(type.getDouble(block, position), 0.0);
}
if (type instanceof VarcharType || type instanceof CharType || type.equals(VARBINARY)) {
assertEquals(type.getSlice(block, position).length(), 0);
}
}
}
}

private static void setDwrfLowMemoryFlag(RecordWriter recordWriter)
{
Object writer = getFieldValue(recordWriter, "writer");
Expand Down

0 comments on commit 469637d

Please sign in to comment.