Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minor: Move some aggregate error tests to sqllogictests #5055

Merged
merged 3 commits into from
Jan 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 0 additions & 114 deletions datafusion/core/tests/sql/aggregates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,120 +20,6 @@ use datafusion::scalar::ScalarValue;
use datafusion::test_util::scan_empty;
use datafusion_common::cast::as_float64_array;

#[tokio::test]
#[ignore] // https://github.com/apache/arrow-datafusion/issues/3353
async fn csv_query_approx_count() -> Result<()> {
let ctx = SessionContext::new();
register_aggregate_csv(&ctx).await?;
let sql = "SELECT approx_distinct(c9) count_c9, approx_distinct(cast(c9 as varchar)) count_c9_str FROM aggregate_test_100";
let actual = execute_to_batches(&ctx, sql).await;
let expected = vec![
"+----------+--------------+",
"| count_c9 | count_c9_str |",
"+----------+--------------+",
"| 100 | 99 |",
"+----------+--------------+",
];
assert_batches_eq!(expected, &actual);
Ok(())
}

#[tokio::test]
async fn csv_query_approx_percentile_cont_with_weight() -> Result<()> {
let ctx = SessionContext::new();
register_aggregate_csv(&ctx).await?;

let results = plan_and_collect(
&ctx,
"SELECT approx_percentile_cont_with_weight(c1, c2, 0.95) FROM aggregate_test_100",
)
.await
.unwrap_err();
assert_eq!(results.to_string(), "Error during planning: The function ApproxPercentileContWithWeight does not support inputs of type Utf8.");

let results = plan_and_collect(
&ctx,
"SELECT approx_percentile_cont_with_weight(c3, c1, 0.95) FROM aggregate_test_100",
)
.await
.unwrap_err();
assert_eq!(results.to_string(), "Error during planning: The weight argument for ApproxPercentileContWithWeight does not support inputs of type Utf8.");

let results = plan_and_collect(
&ctx,
"SELECT approx_percentile_cont_with_weight(c3, c2, c1) FROM aggregate_test_100",
)
.await
.unwrap_err();
assert_eq!(results.to_string(), "Error during planning: The percentile argument for ApproxPercentileContWithWeight must be Float64, not Utf8.");

Ok(())
}

#[tokio::test]
async fn csv_query_approx_percentile_cont_with_histogram_bins() -> Result<()> {
let ctx = SessionContext::new();
register_aggregate_csv(&ctx).await?;

let results = plan_and_collect(
&ctx,
"SELECT c1, approx_percentile_cont(c3, 0.95, -1000) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1",
)
.await
.unwrap_err();
assert_eq!(results.to_string(), "This feature is not implemented: Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be UInt > 0 literal (got data type Int64).");

let results = plan_and_collect(
&ctx,
"SELECT approx_percentile_cont(c3, 0.95, c1) FROM aggregate_test_100",
)
.await
.unwrap_err();
assert_eq!(results.to_string(), "Error during planning: The percentile sample points count for ApproxPercentileCont must be integer, not Utf8.");

let results = plan_and_collect(
&ctx,
"SELECT approx_percentile_cont(c3, 0.95, 111.1) FROM aggregate_test_100",
)
.await
.unwrap_err();
assert_eq!(results.to_string(), "Error during planning: The percentile sample points count for ApproxPercentileCont must be integer, not Float64.");

Ok(())
}

#[tokio::test]
async fn csv_query_array_agg_unsupported() -> Result<()> {
let ctx = SessionContext::new();
register_aggregate_csv(&ctx).await?;

let results = plan_and_collect(
&ctx,
"SELECT array_agg(c13 ORDER BY c1) FROM aggregate_test_100",
)
.await
.unwrap_err();

assert_eq!(
results.to_string(),
"This feature is not implemented: ORDER BY not supported in ARRAY_AGG: c1"
);

let results = plan_and_collect(
&ctx,
"SELECT array_agg(c13 LIMIT 1) FROM aggregate_test_100",
)
.await
.unwrap_err();

assert_eq!(
results.to_string(),
"This feature is not implemented: LIMIT not supported in ARRAY_AGG: 1"
);

Ok(())
}

#[tokio::test]
async fn csv_query_array_agg_distinct() -> Result<()> {
let ctx = SessionContext::new();
Expand Down
35 changes: 35 additions & 0 deletions datafusion/core/tests/sqllogictests/test_files/aggregate.slt
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,41 @@ STORED AS CSV
WITH HEADER ROW
LOCATION '../../testing/data/csv/aggregate_test_100.csv'

#######
# Error tests
#######

# https://github.com/apache/arrow-datafusion/issues/3353
statement error Aggregations require unique expression names
SELECT approx_distinct(c9) count_c9, approx_distinct(cast(c9 as varchar)) count_c9_str FROM aggregate_test_100

# csv_query_approx_percentile_cont_with_weight
Copy link
Contributor

@korowa korowa Jan 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like duplicated block, with minor differences like double spaces on line 51 and dot in the end of line 48

Copy link
Contributor

@korowa korowa Jan 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And it also seems that each of these 3 error messages should end with dot, though sqllogictests completely ignores actual error message - even

statement error just some random text here
SELECT approx_percentile_cont_with_weight(c1, c2, 0.95) FROM aggregate_test_100

counts as passed test 🤔

Copy link
Contributor Author

@alamb alamb Jan 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call with duplicated block. Thank you 🙏

As for the error messages, I am not sure what you mean by "though sqllogictests completely ignores actual error message - even". Can you please tell me what you did to observe this behavior?

I tried changing the expected error message like this

diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
index 3b527dcd8..6931018da 100644
--- a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
@@ -56,7 +56,7 @@ SELECT approx_percentile_cont_with_weight(c3, c2, c1) FROM aggregate_test_100
 
 
 # csv_query_approx_percentile_cont_with_weight
-statement error Error during planning: The function ApproxPercentileContWithWeight does not support inputs of type Utf8.
+statement error some random text
 SELECT approx_percentile_cont_with_weight(c1, c2, 0.95) FROM aggregate_test_100
 
 statement error Error during planning: The weight argument for ApproxPercentileContWithWeight does not support inputs of type Utf8

And then test failed, as I expected:

cargo test --test sqllogictests
....
STORED AS CSV
WITH HEADER ROW
LOCATION '../../testing/data/csv/aggregate_test_100.csv'"
[aggregate.slt] Running query: "SELECT approx_distinct(c9) count_c9, approx_distinct(cast(c9 as varchar)) count_c9_str FROM aggregate_test_100"
[aggregate.slt] Running query: "SELECT approx_percentile_cont_with_weight(c1, c2, 0.95) FROM aggregate_test_100"
[aggregate.slt] Running query: "SELECT approx_percentile_cont_with_weight(c3, c1, 0.95) FROM aggregate_test_100"
[aggregate.slt] Running query: "SELECT approx_percentile_cont_with_weight(c3, c2, c1) FROM aggregate_test_100"
[aggregate.slt] Running query: "SELECT approx_percentile_cont_with_weight(c1, c2, 0.95) FROM aggregate_test_100"
Error: statement is expected to fail with error:
	some random text
but got error:
	DataFusion error: Error during planning: The function ApproxPercentileContWithWeight does not support inputs of type Utf8.
[SQL] SELECT approx_percentile_cont_with_weight(c1, c2, 0.95) FROM aggregate_test_100
at tests/sqllogictests/test_files/aggregate.slt:59

error: test failed, to rerun pass `-p datafusion --test sqllogictests`

🤔

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I should have checked this more careful - it was unsaved version of file.

Now I see that it's ok if expected result is leftmost substring of SQL execution result, so it perfectly explains why missing dot in the end of string doesn't affect test results.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for double checking @korowa

statement error Error during planning: The function ApproxPercentileContWithWeight does not support inputs of type Utf8.
SELECT approx_percentile_cont_with_weight(c1, c2, 0.95) FROM aggregate_test_100

statement error Error during planning: The weight argument for ApproxPercentileContWithWeight does not support inputs of type Utf8
SELECT approx_percentile_cont_with_weight(c3, c1, 0.95) FROM aggregate_test_100

statement error Error during planning: The percentile argument for ApproxPercentileContWithWeight must be Float64, not Utf8.
SELECT approx_percentile_cont_with_weight(c3, c2, c1) FROM aggregate_test_100

# csv_query_approx_percentile_cont_with_histogram_bins
statement error This feature is not implemented: Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be UInt > 0 literal \(got data type Int64\).
SELECT c1, approx_percentile_cont(c3, 0.95, -1000) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1

statement error Error during planning: The percentile sample points count for ApproxPercentileCont must be integer, not Utf8.
SELECT approx_percentile_cont(c3, 0.95, c1) FROM aggregate_test_100

statement error Error during planning: The percentile sample points count for ApproxPercentileCont must be integer, not Float64.
SELECT approx_percentile_cont(c3, 0.95, 111.1) FROM aggregate_test_100

# csv_query_array_agg_unsupported
statement error This feature is not implemented: ORDER BY not supported in ARRAY_AGG: c1
SELECT array_agg(c13 ORDER BY c1) FROM aggregate_test_100

statement error This feature is not implemented: LIMIT not supported in ARRAY_AGG: 1
SELECT array_agg(c13 LIMIT 1) FROM aggregate_test_100


# FIX: custom absolute values
# csv_query_avg_multi_batch
Expand Down