From 0b1905e6dba42a077cc55fb30e8099f7b5ed3c10 Mon Sep 17 00:00:00 2001 From: Jiayu Liu Date: Fri, 18 Jun 2021 14:41:25 +0800 Subject: [PATCH 1/4] fix clippy warnings --- ballista/rust/core/src/serde/scheduler/mod.rs | 2 +- datafusion-cli/src/print_format.rs | 2 +- datafusion/src/execution/context.rs | 7 ++-- datafusion/src/logical_plan/dfschema.rs | 2 ++ datafusion/src/logical_plan/display.rs | 2 +- datafusion/src/optimizer/utils.rs | 2 +- .../src/physical_plan/expressions/not.rs | 2 +- datafusion/src/physical_plan/hash_join.rs | 3 +- .../src/physical_plan/regex_expressions.rs | 2 +- datafusion/src/scalar.rs | 4 +-- datafusion/src/sql/planner.rs | 32 +++++++++---------- 11 files changed, 29 insertions(+), 31 deletions(-) diff --git a/ballista/rust/core/src/serde/scheduler/mod.rs b/ballista/rust/core/src/serde/scheduler/mod.rs index b502c325595f..c9bd1e93db2c 100644 --- a/ballista/rust/core/src/serde/scheduler/mod.rs +++ b/ballista/rust/core/src/serde/scheduler/mod.rs @@ -142,7 +142,7 @@ impl PartitionStats { ] } - pub fn to_arrow_arrayref(&self) -> Result, BallistaError> { + pub fn to_arrow_arrayref(self) -> Result, BallistaError> { let mut field_builders = Vec::new(); let mut num_rows_builder = UInt64Builder::new(1); diff --git a/datafusion-cli/src/print_format.rs b/datafusion-cli/src/print_format.rs index 34cf5e1f65e9..dadee4c7c844 100644 --- a/datafusion-cli/src/print_format.rs +++ b/datafusion-cli/src/print_format.rs @@ -151,7 +151,7 @@ mod tests { #[test] fn test_from_str_failure() { - assert_eq!(true, "pretty".parse::().is_err()); + assert!("pretty".parse::().is_err()); } #[test] diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs index 183524497940..105e5b1543cf 100644 --- a/datafusion/src/execution/context.rs +++ b/datafusion/src/execution/context.rs @@ -1125,7 +1125,7 @@ mod tests { let ctx = create_ctx(&tmp_dir, 1)?; let schema: Schema = ctx.table("test").unwrap().schema().clone().into(); - assert_eq!(schema.field_with_name("c1")?.is_nullable(), false); + assert!(!schema.field_with_name("c1")?.is_nullable()); let plan = LogicalPlanBuilder::scan_empty("", &schema, None)? .project(vec![col("c1")])? @@ -1133,10 +1133,7 @@ mod tests { let plan = ctx.optimize(&plan)?; let physical_plan = ctx.create_physical_plan(&Arc::new(plan))?; - assert_eq!( - physical_plan.schema().field_with_name("c1")?.is_nullable(), - false - ); + assert!(!physical_plan.schema().field_with_name("c1")?.is_nullable()); Ok(()) } diff --git a/datafusion/src/logical_plan/dfschema.rs b/datafusion/src/logical_plan/dfschema.rs index 5a9167e58b05..c5437b3af953 100644 --- a/datafusion/src/logical_plan/dfschema.rs +++ b/datafusion/src/logical_plan/dfschema.rs @@ -248,12 +248,14 @@ where } impl ToDFSchema for Schema { + #[allow(clippy::wrong_self_convention)] fn to_dfschema(self) -> Result { DFSchema::try_from(self) } } impl ToDFSchema for SchemaRef { + #[allow(clippy::wrong_self_convention)] fn to_dfschema(self) -> Result { // Attempt to use the Schema directly if there are no other // references, otherwise clone diff --git a/datafusion/src/logical_plan/display.rs b/datafusion/src/logical_plan/display.rs index f285534fdf1b..8178ef4484b2 100644 --- a/datafusion/src/logical_plan/display.rs +++ b/datafusion/src/logical_plan/display.rs @@ -197,7 +197,7 @@ impl<'a, 'b> PlanVisitor for GraphvizVisitor<'a, 'b> { // id [label="foo"] let label = if self.with_schema { format!( - "{}\\nSchema: {}", + r"{}\nSchema: {}", plan.display(), display_schema(&plan.schema().as_ref().to_owned().into()) ) diff --git a/datafusion/src/optimizer/utils.rs b/datafusion/src/optimizer/utils.rs index e707d30bc9ac..014ec74a0bfe 100644 --- a/datafusion/src/optimizer/utils.rs +++ b/datafusion/src/optimizer/utils.rs @@ -551,7 +551,7 @@ mod tests { stringified_plans, .. } => { - assert_eq!(*verbose, true); + assert!(*verbose); let expected_stringified_plans = vec![ StringifiedPlan::new(PlanType::LogicalPlan, "..."), diff --git a/datafusion/src/physical_plan/expressions/not.rs b/datafusion/src/physical_plan/expressions/not.rs index 23a1a46651de..7a997b61b488 100644 --- a/datafusion/src/physical_plan/expressions/not.rs +++ b/datafusion/src/physical_plan/expressions/not.rs @@ -129,7 +129,7 @@ mod tests { let expr = not(col("a"), &schema)?; assert_eq!(expr.data_type(&schema)?, DataType::Boolean); - assert_eq!(expr.nullable(&schema)?, true); + assert!(expr.nullable(&schema)?); let input = BooleanArray::from(vec![Some(true), None, Some(false)]); let expected = &BooleanArray::from(vec![Some(false), None, Some(true)]); diff --git a/datafusion/src/physical_plan/hash_join.rs b/datafusion/src/physical_plan/hash_join.rs index 644d2d486c85..928392a84433 100644 --- a/datafusion/src/physical_plan/hash_join.rs +++ b/datafusion/src/physical_plan/hash_join.rs @@ -684,11 +684,10 @@ fn build_join_indexes( &keys_values, )? { left_indices.append_value(i)?; - right_indices.append_value(row as u32)?; } else { left_indices.append_null()?; - right_indices.append_value(row as u32)?; } + right_indices.append_value(row as u32)?; } } None => { diff --git a/datafusion/src/physical_plan/regex_expressions.rs b/datafusion/src/physical_plan/regex_expressions.rs index b526e7259ef6..69b27ffb2662 100644 --- a/datafusion/src/physical_plan/regex_expressions.rs +++ b/datafusion/src/physical_plan/regex_expressions.rs @@ -62,7 +62,7 @@ pub fn regexp_match(args: &[ArrayRef]) -> Result String { lazy_static! { - static ref CAPTURE_GROUPS_RE: Regex = Regex::new("(\\\\)(\\d*)").unwrap(); + static ref CAPTURE_GROUPS_RE: Regex = Regex::new(r"(\\)(\d*)").unwrap(); } CAPTURE_GROUPS_RE .replace_all(replacement, "$${$2}") diff --git a/datafusion/src/scalar.rs b/datafusion/src/scalar.rs index 933bb8cebcb1..c7afbf55e367 100644 --- a/datafusion/src/scalar.rs +++ b/datafusion/src/scalar.rs @@ -1123,7 +1123,7 @@ mod tests { let array = value.to_array(); let array = array.as_any().downcast_ref::().unwrap(); assert_eq!(array.len(), 1); - assert_eq!(false, array.is_null(0)); + assert!(!array.is_null(0)); assert_eq!(array.value(0), 13); let value = ScalarValue::UInt64(None); @@ -1139,7 +1139,7 @@ mod tests { let array = value.to_array(); let array = array.as_any().downcast_ref::().unwrap(); assert_eq!(array.len(), 1); - assert_eq!(false, array.is_null(0)); + assert!(!array.is_null(0)); assert_eq!(array.value(0), 13); let value = ScalarValue::UInt32(None); diff --git a/datafusion/src/sql/planner.rs b/datafusion/src/sql/planner.rs index 4c1d8610dfdd..f06f231b9887 100644 --- a/datafusion/src/sql/planner.rs +++ b/datafusion/src/sql/planner.rs @@ -1653,7 +1653,7 @@ mod tests { let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( format!( - "Plan(\"Invalid identifier \\\'doesnotexist\\\' for schema {}\")", + r#"Plan("Invalid identifier \'doesnotexist\' for schema {}")"#, PERSON_COLUMN_NAMES ), format!("{:?}", err) @@ -1665,7 +1665,7 @@ mod tests { let sql = "SELECT age, age FROM person"; let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( - "Plan(\"Projections require unique expression names but the expression \\\"#age\\\" at position 0 and \\\"#age\\\" at position 1 have the same name. Consider aliasing (\\\"AS\\\") one of them.\")", + r##"Plan("Projections require unique expression names but the expression \"#age\" at position 0 and \"#age\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##, format!("{:?}", err) ); } @@ -1675,7 +1675,7 @@ mod tests { let sql = "SELECT *, age FROM person"; let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( - "Plan(\"Projections require unique expression names but the expression \\\"#age\\\" at position 3 and \\\"#age\\\" at position 8 have the same name. Consider aliasing (\\\"AS\\\") one of them.\")", + r##"Plan("Projections require unique expression names but the expression \"#age\" at position 3 and \"#age\" at position 8 have the same name. Consider aliasing (\"AS\") one of them.")"##, format!("{:?}", err) ); } @@ -1714,7 +1714,7 @@ mod tests { let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( format!( - "Plan(\"Invalid identifier \\\'doesnotexist\\\' for schema {}\")", + r#"Plan("Invalid identifier \'doesnotexist\' for schema {}")"#, PERSON_COLUMN_NAMES ), format!("{:?}", err) @@ -1727,7 +1727,7 @@ mod tests { let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( format!( - "Plan(\"Invalid identifier \\\'x\\\' for schema {}\")", + r#"Plan("Invalid identifier \'x\' for schema {}")"#, PERSON_COLUMN_NAMES ), format!("{:?}", err) @@ -2200,7 +2200,7 @@ mod tests { let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( format!( - "Plan(\"Invalid identifier \\\'doesnotexist\\\' for schema {}\")", + r#"Plan("Invalid identifier \'doesnotexist\' for schema {}")"#, PERSON_COLUMN_NAMES ), format!("{:?}", err) @@ -2212,7 +2212,7 @@ mod tests { let sql = "SELECT MIN(age), MIN(age) FROM person"; let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( - "Plan(\"Projections require unique expression names but the expression \\\"#MIN(age)\\\" at position 0 and \\\"#MIN(age)\\\" at position 1 have the same name. Consider aliasing (\\\"AS\\\") one of them.\")", + r##"Plan("Projections require unique expression names but the expression \"#MIN(age)\" at position 0 and \"#MIN(age)\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##, format!("{:?}", err) ); } @@ -2242,7 +2242,7 @@ mod tests { let sql = "SELECT MIN(age) AS a, MIN(age) AS a FROM person"; let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( - "Plan(\"Projections require unique expression names but the expression \\\"#MIN(age) AS a\\\" at position 0 and \\\"#MIN(age) AS a\\\" at position 1 have the same name. Consider aliasing (\\\"AS\\\") one of them.\")", + r##"Plan("Projections require unique expression names but the expression \"#MIN(age) AS a\" at position 0 and \"#MIN(age) AS a\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##, format!("{:?}", err) ); } @@ -2272,7 +2272,7 @@ mod tests { let sql = "SELECT state AS a, MIN(age) AS a FROM person GROUP BY state"; let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( - "Plan(\"Projections require unique expression names but the expression \\\"#state AS a\\\" at position 0 and \\\"#MIN(age) AS a\\\" at position 1 have the same name. Consider aliasing (\\\"AS\\\") one of them.\")", + r##"Plan("Projections require unique expression names but the expression \"#state AS a\" at position 0 and \"#MIN(age) AS a\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##, format!("{:?}", err) ); } @@ -2293,7 +2293,7 @@ mod tests { let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( format!( - "Plan(\"Invalid identifier \\\'doesnotexist\\\' for schema {}\")", + r#"Plan("Invalid identifier \'doesnotexist\' for schema {}")"#, PERSON_COLUMN_NAMES ), format!("{:?}", err) @@ -2306,7 +2306,7 @@ mod tests { let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( format!( - "Plan(\"Invalid identifier \\\'doesnotexist\\\' for schema {}\")", + r#"Plan("Invalid identifier \'doesnotexist\' for schema {}")"#, PERSON_COLUMN_NAMES ), format!("{:?}", err) @@ -2318,7 +2318,7 @@ mod tests { let sql = "SELECT INTERVAL '100000000000000000 day'"; let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( - "NotImplemented(\"Interval field value out of range: \\\"100000000000000000 day\\\"\")", + r#"NotImplemented("Interval field value out of range: \"100000000000000000 day\"")"#, format!("{:?}", err) ); } @@ -2328,7 +2328,7 @@ mod tests { let sql = "SELECT INTERVAL '1 year 1 day'"; let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( - "NotImplemented(\"DF does not support intervals that have both a Year/Month part as well as Days/Hours/Mins/Seconds: \\\"1 year 1 day\\\". Hint: try breaking the interval into two parts, one with Year/Month and the other with Days/Hours/Mins/Seconds - e.g. (NOW() + INTERVAL \\\'1 year\\\') + INTERVAL \\\'1 day\\\'\")", + r#"NotImplemented("DF does not support intervals that have both a Year/Month part as well as Days/Hours/Mins/Seconds: \"1 year 1 day\". Hint: try breaking the interval into two parts, one with Year/Month and the other with Days/Hours/Mins/Seconds - e.g. (NOW() + INTERVAL '1 year') + INTERVAL '1 day'")"#, format!("{:?}", err) ); } @@ -2391,7 +2391,7 @@ mod tests { let sql = "SELECT state, MIN(age), MIN(age) FROM person GROUP BY state"; let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( - "Plan(\"Projections require unique expression names but the expression \\\"#MIN(age)\\\" at position 1 and \\\"#MIN(age)\\\" at position 2 have the same name. Consider aliasing (\\\"AS\\\") one of them.\")", + r##"Plan("Projections require unique expression names but the expression \"#MIN(age)\" at position 1 and \"#MIN(age)\" at position 2 have the same name. Consider aliasing (\"AS\") one of them.")"##, format!("{:?}", err) ); } @@ -2451,7 +2451,7 @@ mod tests { "SELECT ((age + 1) / 2) * (age + 9), MIN(first_name) FROM person GROUP BY age + 1"; let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( - "Plan(\"Projection references non-aggregate values\")", + r#"Plan("Projection references non-aggregate values")"#, format!("{:?}", err) ); } @@ -2462,7 +2462,7 @@ mod tests { let sql = "SELECT age, MIN(first_name) FROM person GROUP BY age + 1"; let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( - "Plan(\"Projection references non-aggregate values\")", + r#"Plan("Projection references non-aggregate values")"#, format!("{:?}", err) ); } From 683bf61d9562375ea4f563bae80a9a8e7f0a985e Mon Sep 17 00:00:00 2001 From: Jiayu Liu Date: Fri, 18 Jun 2021 19:47:44 +0800 Subject: [PATCH 2/4] fix test case --- datafusion/src/sql/planner.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/datafusion/src/sql/planner.rs b/datafusion/src/sql/planner.rs index f06f231b9887..547e9afd38d9 100644 --- a/datafusion/src/sql/planner.rs +++ b/datafusion/src/sql/planner.rs @@ -1653,7 +1653,7 @@ mod tests { let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( format!( - r#"Plan("Invalid identifier \'doesnotexist\' for schema {}")"#, + r#"Plan("Invalid identifier 'doesnotexist' for schema {}")"#, PERSON_COLUMN_NAMES ), format!("{:?}", err) @@ -1714,7 +1714,7 @@ mod tests { let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( format!( - r#"Plan("Invalid identifier \'doesnotexist\' for schema {}")"#, + r#"Plan("Invalid identifier 'doesnotexist' for schema {}")"#, PERSON_COLUMN_NAMES ), format!("{:?}", err) @@ -1727,7 +1727,7 @@ mod tests { let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( format!( - r#"Plan("Invalid identifier \'x\' for schema {}")"#, + r#"Plan("Invalid identifier 'x' for schema {}")"#, PERSON_COLUMN_NAMES ), format!("{:?}", err) @@ -2200,7 +2200,7 @@ mod tests { let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( format!( - r#"Plan("Invalid identifier \'doesnotexist\' for schema {}")"#, + r#"Plan("Invalid identifier 'doesnotexist' for schema {}")"#, PERSON_COLUMN_NAMES ), format!("{:?}", err) @@ -2293,7 +2293,7 @@ mod tests { let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( format!( - r#"Plan("Invalid identifier \'doesnotexist\' for schema {}")"#, + r#"Plan("Invalid identifier 'doesnotexist' for schema {}")"#, PERSON_COLUMN_NAMES ), format!("{:?}", err) @@ -2306,7 +2306,7 @@ mod tests { let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( format!( - r#"Plan("Invalid identifier \'doesnotexist\' for schema {}")"#, + r#"Plan("Invalid identifier 'doesnotexist' for schema {}")"#, PERSON_COLUMN_NAMES ), format!("{:?}", err) From 5a0f93dd6557f07d523e9c89f5ea7117d7a4e000 Mon Sep 17 00:00:00 2001 From: Jiayu Liu Date: Fri, 18 Jun 2021 20:23:50 +0800 Subject: [PATCH 3/4] upgrade tarpaulin --- .github/workflows/rust.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 933b51353d06..7da355fe3c16 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -349,9 +349,7 @@ jobs: export ARROW_TEST_DATA=$(pwd)/testing/data export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data - # 2020-11-15: There is a cargo-tarpaulin regression in 0.17.0 - # see https://github.com/xd009642/tarpaulin/issues/618 - cargo install --version 0.16.0 cargo-tarpaulin + cargo install --version 0.18.0-alpha3 cargo-tarpaulin cargo tarpaulin --out Xml env: CARGO_HOME: "/home/runner/.cargo" From ac7808ba2dea0fde5a1dce280529b71f819e80bc Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 18 Jun 2021 17:22:34 -0400 Subject: [PATCH 4/4] Disable coverage job while it is failing --- .github/workflows/rust.yml | 74 ++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 35 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 7da355fe3c16..7a2890c98b9f 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -321,39 +321,43 @@ jobs: # Ignore MIRI errors until we can get a clean run cargo miri test || true - coverage: - name: Coverage - runs-on: ubuntu-latest - strategy: - matrix: - arch: [amd64] - rust: [stable] - steps: - - uses: actions/checkout@v2 - with: - submodules: true - - name: Cache Cargo - uses: actions/cache@v2 - with: - path: /home/runner/.cargo - # this key is not equal because the user is different than on a container (runner vs github) - key: cargo-coverage-cache- - - name: Cache Rust dependencies - uses: actions/cache@v2 - with: - path: /home/runner/target - # this key is not equal because coverage uses different compilation flags. - key: ${{ runner.os }}-${{ matrix.arch }}-target-coverage-cache-${{ matrix.rust }}- - - name: Run coverage - run: | - export ARROW_TEST_DATA=$(pwd)/testing/data - export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data +# Coverage job was failing. https://github.com/apache/arrow-datafusion/issues/590 tracks re-instating it - cargo install --version 0.18.0-alpha3 cargo-tarpaulin - cargo tarpaulin --out Xml - env: - CARGO_HOME: "/home/runner/.cargo" - CARGO_TARGET_DIR: "/home/runner/target" - - name: Report coverage - continue-on-error: true - run: bash <(curl -s https://codecov.io/bash) + # coverage: + # name: Coverage + # runs-on: ubuntu-latest + # strategy: + # matrix: + # arch: [amd64] + # rust: [stable] + # steps: + # - uses: actions/checkout@v2 + # with: + # submodules: true + # - name: Cache Cargo + # uses: actions/cache@v2 + # with: + # path: /home/runner/.cargo + # # this key is not equal because the user is different than on a container (runner vs github) + # key: cargo-coverage-cache- + # - name: Cache Rust dependencies + # uses: actions/cache@v2 + # with: + # path: /home/runner/target + # # this key is not equal because coverage uses different compilation flags. + # key: ${{ runner.os }}-${{ matrix.arch }}-target-coverage-cache-${{ matrix.rust }}- + # - name: Run coverage + # run: | + # export ARROW_TEST_DATA=$(pwd)/testing/data + # export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data + + # # 2020-11-15: There is a cargo-tarpaulin regression in 0.17.0 + # # see https://github.com/xd009642/tarpaulin/issues/618 + # cargo install --version 0.16.0 cargo-tarpaulin + # cargo tarpaulin --out Xml + # env: + # CARGO_HOME: "/home/runner/.cargo" + # CARGO_TARGET_DIR: "/home/runner/target" + # - name: Report coverage + # continue-on-error: true + # run: bash <(curl -s https://codecov.io/bash)