From 176e8f8a4db4ce50e02c0fb11da28702e52aeb89 Mon Sep 17 00:00:00 2001 From: Yi Shen Date: Fri, 3 Jan 2020 13:57:37 -0800 Subject: [PATCH] Add checks for the cardinalities sum when validating an array column For array column mismatch, add additional information of the total number of elements across all rows. --- .../checksum/ArrayColumnValidator.java | 30 +++++++++++++++++-- .../checksum/TestChecksumValidator.java | 29 ++++++++++++++++-- .../framework/TestDataVerification.java | 4 ++- 3 files changed, 56 insertions(+), 7 deletions(-) diff --git a/presto-verifier/src/main/java/com/facebook/presto/verifier/checksum/ArrayColumnValidator.java b/presto-verifier/src/main/java/com/facebook/presto/verifier/checksum/ArrayColumnValidator.java index fec36dec3a80..78071e72f9c3 100644 --- a/presto-verifier/src/main/java/com/facebook/presto/verifier/checksum/ArrayColumnValidator.java +++ b/presto-verifier/src/main/java/com/facebook/presto/verifier/checksum/ArrayColumnValidator.java @@ -19,6 +19,7 @@ import com.facebook.presto.sql.tree.CoalesceExpression; import com.facebook.presto.sql.tree.Expression; import com.facebook.presto.sql.tree.FunctionCall; +import com.facebook.presto.sql.tree.LongLiteral; import com.facebook.presto.sql.tree.QualifiedName; import com.facebook.presto.sql.tree.SingleColumn; import com.facebook.presto.sql.tree.TryExpression; @@ -67,7 +68,15 @@ public List generateChecksumColumns(Column column) checksum = new FunctionCall(QualifiedName.of("checksum"), ImmutableList.of(column.getIdentifier())); } - return ImmutableList.of(new SingleColumn(checksum, Optional.of(delimitedIdentifier(getChecksumColumnAlias(column))))); + Expression arrayCardinalitySum = new CoalesceExpression( + new FunctionCall( + QualifiedName.of("sum"), + ImmutableList.of(new FunctionCall(QualifiedName.of("cardinality"), ImmutableList.of(column.getIdentifier())))), + new LongLiteral("0")); + + return ImmutableList.of( + new SingleColumn(checksum, Optional.of(delimitedIdentifier(getChecksumColumnAlias(column)))), + new SingleColumn(arrayCardinalitySum, Optional.of(delimitedIdentifier(getCardinalitySumColumnAlias(column))))); } @Override @@ -76,13 +85,28 @@ public ColumnMatchResult validate(Column column, ChecksumResult controlResult, C String checksumColumnAlias = getChecksumColumnAlias(column); Object controlChecksum = controlResult.getChecksum(checksumColumnAlias); Object testChecksum = testResult.getChecksum(checksumColumnAlias); + + String cardinalitySumColumnAlias = getCardinalitySumColumnAlias(column); + Object controlCardinalitySum = controlResult.getChecksum(cardinalitySumColumnAlias); + Object testCardinalitySum = testResult.getChecksum(cardinalitySumColumnAlias); + return new ColumnMatchResult( - Objects.equals(controlChecksum, testChecksum), - format("control(checksum: %s) test(checksum: %s)", controlChecksum, testChecksum)); + Objects.equals(controlChecksum, testChecksum) && Objects.equals(controlCardinalitySum, testCardinalitySum), + format( + "control(checksum: %s, cardinality_sum: %s) test(checksum: %s, cardinality_sum: %s)", + controlChecksum, + controlCardinalitySum, + testChecksum, + testCardinalitySum)); } private static String getChecksumColumnAlias(Column column) { return column.getName() + "_checksum"; } + + private static String getCardinalitySumColumnAlias(Column column) + { + return column.getName() + "_cardinality_sum"; + } } diff --git a/presto-verifier/src/test/java/com/facebook/presto/verifier/checksum/TestChecksumValidator.java b/presto-verifier/src/test/java/com/facebook/presto/verifier/checksum/TestChecksumValidator.java index b15fb011fc3c..1b11b425af34 100644 --- a/presto-verifier/src/test/java/com/facebook/presto/verifier/checksum/TestChecksumValidator.java +++ b/presto-verifier/src/test/java/com/facebook/presto/verifier/checksum/TestChecksumValidator.java @@ -111,8 +111,11 @@ public void testChecksumQuery() ", \"count\"(\"real\") FILTER (WHERE (\"real\" = \"infinity\"())) \"real_pos_inf_count\"\n" + ", \"count\"(\"real\") FILTER (WHERE (\"real\" = -\"infinity\"())) \"real_neg_inf_count\"\n" + ", \"checksum\"(\"array_sort\"(\"int_array\")) int_array_checksum\n" + + ", COALESCE(\"sum\"(\"cardinality\"(\"int_array\")), 0) \"int_array_cardinality_sum\"" + ", COALESCE(\"checksum\"(TRY(\"array_sort\"(\"row_array\"))), \"checksum\"(\"row_array\")) \"row_array_checksum\"" + + ", COALESCE(\"sum\"(\"cardinality\"(\"row_array\")), 0) \"row_array_cardinality_sum\"" + ", \"checksum\"(\"map_array\") \"map_array_checksum\"\n" + + ", COALESCE(\"sum\"(\"cardinality\"(\"map_array\")), 0) \"map_array_cardinality_sum\"" + "FROM\n" + " test:di", PARSING_OPTIONS); @@ -285,24 +288,44 @@ public void testArray() 5, ImmutableMap.builder() .put("int_array_checksum", new SqlVarbinary(new byte[] {0xa})) + .put("int_array_cardinality_sum", 3L) .put("map_array_checksum", new SqlVarbinary(new byte[] {0xb})) + .put("map_array_cardinality_sum", 7L) .build()); // Matched assertTrue(checksumValidator.getMismatchedColumns(columns, controlChecksum, controlChecksum).isEmpty()); - // Mismatched + // Mismatched different elements ChecksumResult testChecksum = new ChecksumResult( 5, ImmutableMap.builder() .put("int_array_checksum", new SqlVarbinary(new byte[] {0x1a})) + .put("int_array_cardinality_sum", 3L) + .put("map_array_checksum", new SqlVarbinary(new byte[] {0x1b})) + .put("map_array_cardinality_sum", 7L) + .build()); + assertEquals( + checksumValidator.getMismatchedColumns(columns, controlChecksum, testChecksum), + ImmutableMap.builder() + .put(INT_ARRAY_COLUMN, new ColumnMatchResult(false, "control(checksum: 0a, cardinality_sum: 3) test(checksum: 1a, cardinality_sum: 3)")) + .put(MAP_ARRAY_COLUMN, new ColumnMatchResult(false, "control(checksum: 0b, cardinality_sum: 7) test(checksum: 1b, cardinality_sum: 7)")) + .build()); + + // Mismatched different cardinality sum + testChecksum = new ChecksumResult( + 5, + ImmutableMap.builder() + .put("int_array_checksum", new SqlVarbinary(new byte[] {0x1a})) + .put("int_array_cardinality_sum", 2L) .put("map_array_checksum", new SqlVarbinary(new byte[] {0x1b})) + .put("map_array_cardinality_sum", 5L) .build()); assertEquals( checksumValidator.getMismatchedColumns(columns, controlChecksum, testChecksum), ImmutableMap.builder() - .put(INT_ARRAY_COLUMN, new ColumnMatchResult(false, "control(checksum: 0a) test(checksum: 1a)")) - .put(MAP_ARRAY_COLUMN, new ColumnMatchResult(false, "control(checksum: 0b) test(checksum: 1b)")) + .put(INT_ARRAY_COLUMN, new ColumnMatchResult(false, "control(checksum: 0a, cardinality_sum: 3) test(checksum: 1a, cardinality_sum: 2)")) + .put(MAP_ARRAY_COLUMN, new ColumnMatchResult(false, "control(checksum: 0b, cardinality_sum: 7) test(checksum: 1b, cardinality_sum: 5)")) .build()); } } diff --git a/presto-verifier/src/test/java/com/facebook/presto/verifier/framework/TestDataVerification.java b/presto-verifier/src/test/java/com/facebook/presto/verifier/framework/TestDataVerification.java index f736cec7989b..37b5b5086758 100644 --- a/presto-verifier/src/test/java/com/facebook/presto/verifier/framework/TestDataVerification.java +++ b/presto-verifier/src/test/java/com/facebook/presto/verifier/framework/TestDataVerification.java @@ -263,7 +263,9 @@ public void testArrayOfRow() "COLUMN MISMATCH\n" + "Control 1 rows, Test 1 rows\n" + "Mismatched Columns:\n" + - " _col0 \\(array\\(row\\(integer, varchar\\(1\\)\\)\\)\\): control\\(checksum: 71 b5 2f 7f 1e 9b a6 a4\\) test\\(checksum: b4 3c 7d 02 2b 14 77 12\\)\n")); + " _col0 \\(array\\(row\\(integer, varchar\\(1\\)\\)\\)\\):" + + " control\\(checksum: 71 b5 2f 7f 1e 9b a6 a4, cardinality_sum: 2\\)" + + " test\\(checksum: b4 3c 7d 02 2b 14 77 12, cardinality_sum: 2\\)\n")); List runs = event.get().getDeterminismAnalysisDetails().getRuns(); assertEquals(runs.size(), 2);