Skip to content

Commit

Permalink
Add checks for the cardinalities sum when validating an array column
Browse files Browse the repository at this point in the history
For array column mismatch, add additional information of the total
number of elements across all rows.
  • Loading branch information
ShenYi authored and mbasmanova committed Jan 7, 2020
1 parent ad99885 commit 176e8f8
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import com.facebook.presto.sql.tree.CoalesceExpression;
import com.facebook.presto.sql.tree.Expression;
import com.facebook.presto.sql.tree.FunctionCall;
import com.facebook.presto.sql.tree.LongLiteral;
import com.facebook.presto.sql.tree.QualifiedName;
import com.facebook.presto.sql.tree.SingleColumn;
import com.facebook.presto.sql.tree.TryExpression;
Expand Down Expand Up @@ -67,7 +68,15 @@ public List<SingleColumn> generateChecksumColumns(Column column)
checksum = new FunctionCall(QualifiedName.of("checksum"), ImmutableList.of(column.getIdentifier()));
}

return ImmutableList.of(new SingleColumn(checksum, Optional.of(delimitedIdentifier(getChecksumColumnAlias(column)))));
Expression arrayCardinalitySum = new CoalesceExpression(
new FunctionCall(
QualifiedName.of("sum"),
ImmutableList.of(new FunctionCall(QualifiedName.of("cardinality"), ImmutableList.of(column.getIdentifier())))),
new LongLiteral("0"));

return ImmutableList.of(
new SingleColumn(checksum, Optional.of(delimitedIdentifier(getChecksumColumnAlias(column)))),
new SingleColumn(arrayCardinalitySum, Optional.of(delimitedIdentifier(getCardinalitySumColumnAlias(column)))));
}

@Override
Expand All @@ -76,13 +85,28 @@ public ColumnMatchResult validate(Column column, ChecksumResult controlResult, C
String checksumColumnAlias = getChecksumColumnAlias(column);
Object controlChecksum = controlResult.getChecksum(checksumColumnAlias);
Object testChecksum = testResult.getChecksum(checksumColumnAlias);

String cardinalitySumColumnAlias = getCardinalitySumColumnAlias(column);
Object controlCardinalitySum = controlResult.getChecksum(cardinalitySumColumnAlias);
Object testCardinalitySum = testResult.getChecksum(cardinalitySumColumnAlias);

return new ColumnMatchResult(
Objects.equals(controlChecksum, testChecksum),
format("control(checksum: %s) test(checksum: %s)", controlChecksum, testChecksum));
Objects.equals(controlChecksum, testChecksum) && Objects.equals(controlCardinalitySum, testCardinalitySum),
format(
"control(checksum: %s, cardinality_sum: %s) test(checksum: %s, cardinality_sum: %s)",
controlChecksum,
controlCardinalitySum,
testChecksum,
testCardinalitySum));
}

private static String getChecksumColumnAlias(Column column)
{
return column.getName() + "_checksum";
}

private static String getCardinalitySumColumnAlias(Column column)
{
return column.getName() + "_cardinality_sum";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,11 @@ public void testChecksumQuery()
", \"count\"(\"real\") FILTER (WHERE (\"real\" = \"infinity\"())) \"real_pos_inf_count\"\n" +
", \"count\"(\"real\") FILTER (WHERE (\"real\" = -\"infinity\"())) \"real_neg_inf_count\"\n" +
", \"checksum\"(\"array_sort\"(\"int_array\")) int_array_checksum\n" +
", COALESCE(\"sum\"(\"cardinality\"(\"int_array\")), 0) \"int_array_cardinality_sum\"" +
", COALESCE(\"checksum\"(TRY(\"array_sort\"(\"row_array\"))), \"checksum\"(\"row_array\")) \"row_array_checksum\"" +
", COALESCE(\"sum\"(\"cardinality\"(\"row_array\")), 0) \"row_array_cardinality_sum\"" +
", \"checksum\"(\"map_array\") \"map_array_checksum\"\n" +
", COALESCE(\"sum\"(\"cardinality\"(\"map_array\")), 0) \"map_array_cardinality_sum\"" +
"FROM\n" +
" test:di",
PARSING_OPTIONS);
Expand Down Expand Up @@ -285,24 +288,44 @@ public void testArray()
5,
ImmutableMap.<String, Object>builder()
.put("int_array_checksum", new SqlVarbinary(new byte[] {0xa}))
.put("int_array_cardinality_sum", 3L)
.put("map_array_checksum", new SqlVarbinary(new byte[] {0xb}))
.put("map_array_cardinality_sum", 7L)
.build());

// Matched
assertTrue(checksumValidator.getMismatchedColumns(columns, controlChecksum, controlChecksum).isEmpty());

// Mismatched
// Mismatched different elements
ChecksumResult testChecksum = new ChecksumResult(
5,
ImmutableMap.<String, Object>builder()
.put("int_array_checksum", new SqlVarbinary(new byte[] {0x1a}))
.put("int_array_cardinality_sum", 3L)
.put("map_array_checksum", new SqlVarbinary(new byte[] {0x1b}))
.put("map_array_cardinality_sum", 7L)
.build());
assertEquals(
checksumValidator.getMismatchedColumns(columns, controlChecksum, testChecksum),
ImmutableMap.builder()
.put(INT_ARRAY_COLUMN, new ColumnMatchResult(false, "control(checksum: 0a, cardinality_sum: 3) test(checksum: 1a, cardinality_sum: 3)"))
.put(MAP_ARRAY_COLUMN, new ColumnMatchResult(false, "control(checksum: 0b, cardinality_sum: 7) test(checksum: 1b, cardinality_sum: 7)"))
.build());

// Mismatched different cardinality sum
testChecksum = new ChecksumResult(
5,
ImmutableMap.<String, Object>builder()
.put("int_array_checksum", new SqlVarbinary(new byte[] {0x1a}))
.put("int_array_cardinality_sum", 2L)
.put("map_array_checksum", new SqlVarbinary(new byte[] {0x1b}))
.put("map_array_cardinality_sum", 5L)
.build());
assertEquals(
checksumValidator.getMismatchedColumns(columns, controlChecksum, testChecksum),
ImmutableMap.builder()
.put(INT_ARRAY_COLUMN, new ColumnMatchResult(false, "control(checksum: 0a) test(checksum: 1a)"))
.put(MAP_ARRAY_COLUMN, new ColumnMatchResult(false, "control(checksum: 0b) test(checksum: 1b)"))
.put(INT_ARRAY_COLUMN, new ColumnMatchResult(false, "control(checksum: 0a, cardinality_sum: 3) test(checksum: 1a, cardinality_sum: 2)"))
.put(MAP_ARRAY_COLUMN, new ColumnMatchResult(false, "control(checksum: 0b, cardinality_sum: 7) test(checksum: 1b, cardinality_sum: 5)"))
.build());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,9 @@ public void testArrayOfRow()
"COLUMN MISMATCH\n" +
"Control 1 rows, Test 1 rows\n" +
"Mismatched Columns:\n" +
" _col0 \\(array\\(row\\(integer, varchar\\(1\\)\\)\\)\\): control\\(checksum: 71 b5 2f 7f 1e 9b a6 a4\\) test\\(checksum: b4 3c 7d 02 2b 14 77 12\\)\n"));
" _col0 \\(array\\(row\\(integer, varchar\\(1\\)\\)\\)\\):" +
" control\\(checksum: 71 b5 2f 7f 1e 9b a6 a4, cardinality_sum: 2\\)" +
" test\\(checksum: b4 3c 7d 02 2b 14 77 12, cardinality_sum: 2\\)\n"));

List<DeterminismAnalysisRun> runs = event.get().getDeterminismAnalysisDetails().getRuns();
assertEquals(runs.size(), 2);
Expand Down

0 comments on commit 176e8f8

Please sign in to comment.