Skip to content

Commit

Permalink
Add checks for the cardinalities sum when validating an array column
Browse files Browse the repository at this point in the history
For array column mismatch, add additional information of the total
number of elements across all rows.
  • Loading branch information
ShenYi committed Jan 6, 2020
1 parent f1f01ce commit 48746be
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import com.facebook.presto.sql.tree.CoalesceExpression;
import com.facebook.presto.sql.tree.Expression;
import com.facebook.presto.sql.tree.FunctionCall;
import com.facebook.presto.sql.tree.LongLiteral;
import com.facebook.presto.sql.tree.QualifiedName;
import com.facebook.presto.sql.tree.SingleColumn;
import com.facebook.presto.sql.tree.TryExpression;
Expand Down Expand Up @@ -67,7 +68,15 @@ public List<SingleColumn> generateChecksumColumns(Column column)
checksum = new FunctionCall(QualifiedName.of("checksum"), ImmutableList.of(column.getIdentifier()));
}

return ImmutableList.of(new SingleColumn(checksum, Optional.of(delimitedIdentifier(getChecksumColumnAlias(column)))));
Expression arrayCardinalitySum = new CoalesceExpression(
new FunctionCall(
QualifiedName.of("sum"),
ImmutableList.of(new FunctionCall(QualifiedName.of("cardinality"), ImmutableList.of(column.getIdentifier())))),
new LongLiteral("0"));

return ImmutableList.of(
new SingleColumn(checksum, Optional.of(delimitedIdentifier(getChecksumColumnAlias(column)))),
new SingleColumn(arrayCardinalitySum, Optional.of(delimitedIdentifier(getCardinalitySumColumnAlias(column)))));
}

@Override
Expand All @@ -76,13 +85,28 @@ public ColumnMatchResult validate(Column column, ChecksumResult controlResult, C
String checksumColumnAlias = getChecksumColumnAlias(column);
Object controlChecksum = controlResult.getChecksum(checksumColumnAlias);
Object testChecksum = testResult.getChecksum(checksumColumnAlias);

String cardinalitySumColumnAlias = getCardinalitySumColumnAlias(column);
Object controlCardinalitySum = controlResult.getChecksum(cardinalitySumColumnAlias);
Object testCardinalitySum = testResult.getChecksum(cardinalitySumColumnAlias);

return new ColumnMatchResult(
Objects.equals(controlChecksum, testChecksum),
format("control(checksum: %s) test(checksum: %s)", controlChecksum, testChecksum));
Objects.equals(controlChecksum, testChecksum) && Objects.equals(controlCardinalitySum, testCardinalitySum),
format(
"control(checksum: %s, cardinality_sum: %s) test(checksum: %s, cardinality_sum: %s)",
controlChecksum,
controlCardinalitySum,
testChecksum,
testCardinalitySum));
}

private static String getChecksumColumnAlias(Column column)
{
return column.getName() + "_checksum";
}

private static String getCardinalitySumColumnAlias(Column column)
{
return column.getName() + "_cardinality_sum";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,11 @@ public void testChecksumQuery()
", \"count\"(\"real\") FILTER (WHERE (\"real\" = \"infinity\"())) \"real_pos_inf_count\"\n" +
", \"count\"(\"real\") FILTER (WHERE (\"real\" = -\"infinity\"())) \"real_neg_inf_count\"\n" +
", \"checksum\"(\"array_sort\"(\"int_array\")) int_array_checksum\n" +
", COALESCE(\"sum\"(\"cardinality\"(\"int_array\")), 0) \"int_array_cardinality_sum\"" +
", COALESCE(\"checksum\"(TRY(\"array_sort\"(\"row_array\"))), \"checksum\"(\"row_array\")) \"row_array_checksum\"" +
", COALESCE(\"sum\"(\"cardinality\"(\"row_array\")), 0) \"row_array_cardinality_sum\"" +
", \"checksum\"(\"map_array\") \"map_array_checksum\"\n" +
", COALESCE(\"sum\"(\"cardinality\"(\"map_array\")), 0) \"map_array_cardinality_sum\"" +
"FROM\n" +
" test:di",
PARSING_OPTIONS);
Expand Down Expand Up @@ -285,24 +288,44 @@ public void testArray()
5,
ImmutableMap.<String, Object>builder()
.put("int_array_checksum", new SqlVarbinary(new byte[] {0xa}))
.put("int_array_cardinality_sum", 3L)
.put("map_array_checksum", new SqlVarbinary(new byte[] {0xb}))
.put("map_array_cardinality_sum", 7L)
.build());

// Matched
assertTrue(checksumValidator.getMismatchedColumns(columns, controlChecksum, controlChecksum).isEmpty());

// Mismatched
// Mismatched different elements
ChecksumResult testChecksum = new ChecksumResult(
5,
ImmutableMap.<String, Object>builder()
.put("int_array_checksum", new SqlVarbinary(new byte[] {0x1a}))
.put("int_array_cardinality_sum", 3L)
.put("map_array_checksum", new SqlVarbinary(new byte[] {0x1b}))
.put("map_array_cardinality_sum", 7L)
.build());
assertEquals(
checksumValidator.getMismatchedColumns(columns, controlChecksum, testChecksum),
ImmutableMap.builder()
.put(INT_ARRAY_COLUMN, new ColumnMatchResult(false, "control(checksum: 0a, cardinality_sum: 3) test(checksum: 1a, cardinality_sum: 3)"))
.put(MAP_ARRAY_COLUMN, new ColumnMatchResult(false, "control(checksum: 0b, cardinality_sum: 7) test(checksum: 1b, cardinality_sum: 7)"))
.build());

// Mismatched different cardinality sum
testChecksum = new ChecksumResult(
5,
ImmutableMap.<String, Object>builder()
.put("int_array_checksum", new SqlVarbinary(new byte[] {0x1a}))
.put("int_array_cardinality_sum", 2L)
.put("map_array_checksum", new SqlVarbinary(new byte[] {0x1b}))
.put("map_array_cardinality_sum", 5L)
.build());
assertEquals(
checksumValidator.getMismatchedColumns(columns, controlChecksum, testChecksum),
ImmutableMap.builder()
.put(INT_ARRAY_COLUMN, new ColumnMatchResult(false, "control(checksum: 0a) test(checksum: 1a)"))
.put(MAP_ARRAY_COLUMN, new ColumnMatchResult(false, "control(checksum: 0b) test(checksum: 1b)"))
.put(INT_ARRAY_COLUMN, new ColumnMatchResult(false, "control(checksum: 0a, cardinality_sum: 3) test(checksum: 1a, cardinality_sum: 2)"))
.put(MAP_ARRAY_COLUMN, new ColumnMatchResult(false, "control(checksum: 0b, cardinality_sum: 7) test(checksum: 1b, cardinality_sum: 5)"))
.build());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,9 @@ public void testArrayOfRow()
"COLUMN MISMATCH\n" +
"Control 1 rows, Test 1 rows\n" +
"Mismatched Columns:\n" +
" _col0 \\(array\\(row\\(integer, varchar\\(1\\)\\)\\)\\): control\\(checksum: 71 b5 2f 7f 1e 9b a6 a4\\) test\\(checksum: b4 3c 7d 02 2b 14 77 12\\)\n"));
" _col0 \\(array\\(row\\(integer, varchar\\(1\\)\\)\\)\\):" +
" control\\(checksum: 71 b5 2f 7f 1e 9b a6 a4, cardinality_sum: 2\\)" +
" test\\(checksum: b4 3c 7d 02 2b 14 77 12, cardinality_sum: 2\\)\n"));

List<DeterminismAnalysisRun> runs = event.get().getDeterminismAnalysisDetails().getRuns();
assertEquals(runs.size(), 2);
Expand Down

0 comments on commit 48746be

Please sign in to comment.