From be4a817cd31e01da37836eb14f5bbc6bff84cc7b Mon Sep 17 00:00:00 2001 From: Zeddicus414 Date: Sat, 11 Feb 2023 22:19:46 -0600 Subject: [PATCH 1/8] Add PD002 use-of-inplace-argument documentation --- README.md | 2 +- .../pandas_vet/rules/inplace_argument.rs | 20 ++++++++++++++ docs/rules/use-of-inplace-argument.md | 26 +++++++++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 docs/rules/use-of-inplace-argument.md diff --git a/README.md b/README.md index cbb4efee7e2b3..3f3fae5d702c6 100644 --- a/README.md +++ b/README.md @@ -1320,7 +1320,7 @@ For more, see [pandas-vet](https://pypi.org/project/pandas-vet/) on PyPI. | Code | Name | Message | Fix | | ---- | ---- | ------- | --- | -| PD002 | use-of-inplace-argument | `inplace=True` should be avoided; it has inconsistent behavior | 🛠 | +| PD002 | [use-of-inplace-argument](https://github.com/charliermarsh/ruff/blob/main/docs/rules/use-of-inplace-argument.md) | `inplace=True` should be avoided; it has inconsistent behavior | 🛠 | | PD003 | use-of-dot-is-null | `.isna` is preferred to `.isnull`; functionality is equivalent | | | PD004 | use-of-dot-not-null | `.notna` is preferred to `.notnull`; functionality is equivalent | | | PD007 | use-of-dot-ix | `.ix` is deprecated; use more explicit `.loc` or `.iloc` | | diff --git a/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs b/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs index 8c48138baed10..091b0411bace1 100644 --- a/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs +++ b/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs @@ -8,6 +8,26 @@ use crate::rules::pandas_vet::fixes::fix_inplace_argument; use crate::violation::AlwaysAutofixableViolation; define_violation!( + ///## What it does + ///Checks for `inplace=True` inside `pandas` code. + /// + ///## Why is this bad? + ///- Many people expect `inplace=True` to be a performance benefit that prevents dataframe copies, but that's often not true. + ///- It encourages mutation rather than immutable data, which is harder to reason about and may cause bugs. + ///- It removes the ability to use the chaining style for `pandas` code. + /// + ///## Example + ///```python + ///df.sort_values("col1", inplace=True) + ///``` + /// + ///Use instead: + ///```python + ///sorted_df = df.sort_values("col1") + ///``` + /// + ///## References + ///- [Why You Should Probably Never Use pandas inplace=True](https://towardsdatascience.com/why-you-should-probably-never-use-pandas-inplace-true-9f9f211849e4) pub struct UseOfInplaceArgument; ); impl AlwaysAutofixableViolation for UseOfInplaceArgument { diff --git a/docs/rules/use-of-inplace-argument.md b/docs/rules/use-of-inplace-argument.md new file mode 100644 index 0000000000000..ed79b139d6168 --- /dev/null +++ b/docs/rules/use-of-inplace-argument.md @@ -0,0 +1,26 @@ +# use-of-inplace-argument (PD002) + +Derived from the **pandas-vet** linter. + +Autofix is always available. + +## What it does +Checks for `inplace=True` inside `pandas` code. + +## Why is this bad? +- Many people expect `inplace=True` to be a performance benefit that prevents dataframe copies, but that's often not true. +- It encourages mutation rather than immutable data, which is harder to reason about and may cause bugs. +- It removes the ability to use the chaining style for `pandas` code. + +## Example +```python +df.sort_values("col1", inplace=True) +``` + +Use instead: +```python +sorted_df = df.sort_values("col1") +``` + +## References +- [Why You Should Probably Never Use pandas inplace=True](https://towardsdatascience.com/why-you-should-probably-never-use-pandas-inplace-true-9f9f211849e4) \ No newline at end of file From e5721c9923d23f0e3d7324089a0df35a18f09a47 Mon Sep 17 00:00:00 2001 From: Zeddicus414 Date: Sat, 11 Feb 2023 22:32:16 -0600 Subject: [PATCH 2/8] Fix formatting --- .../pandas_vet/rules/inplace_argument.rs | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs b/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs index 091b0411bace1..28770f4f4fe50 100644 --- a/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs +++ b/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs @@ -8,26 +8,26 @@ use crate::rules::pandas_vet::fixes::fix_inplace_argument; use crate::violation::AlwaysAutofixableViolation; define_violation!( - ///## What it does - ///Checks for `inplace=True` inside `pandas` code. - /// - ///## Why is this bad? - ///- Many people expect `inplace=True` to be a performance benefit that prevents dataframe copies, but that's often not true. - ///- It encourages mutation rather than immutable data, which is harder to reason about and may cause bugs. - ///- It removes the ability to use the chaining style for `pandas` code. - /// - ///## Example - ///```python - ///df.sort_values("col1", inplace=True) - ///``` - /// - ///Use instead: - ///```python - ///sorted_df = df.sort_values("col1") - ///``` - /// - ///## References - ///- [Why You Should Probably Never Use pandas inplace=True](https://towardsdatascience.com/why-you-should-probably-never-use-pandas-inplace-true-9f9f211849e4) + /// ## What it does + /// Checks for `inplace=True` inside `pandas` code. + /// + /// ## Why is this bad? + /// - Many people expect `inplace=True` to be a performance benefit that prevents dataframe copies, but that's often not true. + /// - It encourages mutation rather than immutable data, which is harder to reason about and may cause bugs. + /// - It removes the ability to use the chaining style for `pandas` code. + /// + /// ## Example + /// ```python + /// df.sort_values("col1", inplace=True) + /// ``` + /// + /// Use instead: + /// ```python + /// sorted_df = df.sort_values("col1") + /// ``` + /// + /// ## References + /// - [Why You Should Probably Never Use pandas inplace=True](https://towardsdatascience.com/why-you-should-probably-never-use-pandas-inplace-true-9f9f211849e4) pub struct UseOfInplaceArgument; ); impl AlwaysAutofixableViolation for UseOfInplaceArgument { From a170efa725b0ad4d2a1df045713ff98c1819d470 Mon Sep 17 00:00:00 2001 From: Zeddicus414 Date: Sat, 11 Feb 2023 22:35:18 -0600 Subject: [PATCH 3/8] Update wording for clarity --- crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs b/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs index 28770f4f4fe50..65f295e887f5d 100644 --- a/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs +++ b/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs @@ -9,7 +9,7 @@ use crate::violation::AlwaysAutofixableViolation; define_violation!( /// ## What it does - /// Checks for `inplace=True` inside `pandas` code. + /// Checks for `inplace=True` in code using the `pandas` library. /// /// ## Why is this bad? /// - Many people expect `inplace=True` to be a performance benefit that prevents dataframe copies, but that's often not true. From bd4924b39b65beaf1f9538d2f1444262ad08ea61 Mon Sep 17 00:00:00 2001 From: Zeddicus414 Date: Sat, 11 Feb 2023 22:56:16 -0600 Subject: [PATCH 4/8] Fix trailing spaces --- .../ruff/src/rules/pandas_vet/rules/inplace_argument.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs b/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs index 65f295e887f5d..3a7c33aee7eff 100644 --- a/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs +++ b/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs @@ -10,22 +10,22 @@ use crate::violation::AlwaysAutofixableViolation; define_violation!( /// ## What it does /// Checks for `inplace=True` in code using the `pandas` library. - /// + /// /// ## Why is this bad? /// - Many people expect `inplace=True` to be a performance benefit that prevents dataframe copies, but that's often not true. /// - It encourages mutation rather than immutable data, which is harder to reason about and may cause bugs. /// - It removes the ability to use the chaining style for `pandas` code. - /// + /// /// ## Example /// ```python /// df.sort_values("col1", inplace=True) /// ``` - /// + /// /// Use instead: /// ```python /// sorted_df = df.sort_values("col1") /// ``` - /// + /// /// ## References /// - [Why You Should Probably Never Use pandas inplace=True](https://towardsdatascience.com/why-you-should-probably-never-use-pandas-inplace-true-9f9f211849e4) pub struct UseOfInplaceArgument; From 4ef66a2db524e631ea05457edeb0d79b0fe9a532 Mon Sep 17 00:00:00 2001 From: Zeddicus414 Date: Sat, 11 Feb 2023 22:56:38 -0600 Subject: [PATCH 5/8] Update wording in generated md file. --- docs/rules/use-of-inplace-argument.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/rules/use-of-inplace-argument.md b/docs/rules/use-of-inplace-argument.md index ed79b139d6168..431104b5b8d57 100644 --- a/docs/rules/use-of-inplace-argument.md +++ b/docs/rules/use-of-inplace-argument.md @@ -5,7 +5,7 @@ Derived from the **pandas-vet** linter. Autofix is always available. ## What it does -Checks for `inplace=True` inside `pandas` code. +Checks for `inplace=True` in code using the `pandas` library. ## Why is this bad? - Many people expect `inplace=True` to be a performance benefit that prevents dataframe copies, but that's often not true. From 03e32f0d1f63b864bb53aa4a1352c0f2b4743e82 Mon Sep 17 00:00:00 2001 From: Zeddicus414 Date: Sat, 11 Feb 2023 23:12:49 -0600 Subject: [PATCH 6/8] Wrap lines. Update wording. --- .../ruff/src/rules/pandas_vet/rules/inplace_argument.rs | 9 ++++++--- docs/rules/use-of-inplace-argument.md | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs b/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs index 3a7c33aee7eff..44e6edc66a83f 100644 --- a/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs +++ b/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs @@ -12,9 +12,12 @@ define_violation!( /// Checks for `inplace=True` in code using the `pandas` library. /// /// ## Why is this bad? - /// - Many people expect `inplace=True` to be a performance benefit that prevents dataframe copies, but that's often not true. - /// - It encourages mutation rather than immutable data, which is harder to reason about and may cause bugs. - /// - It removes the ability to use the chaining style for `pandas` code. + /// - `inplace=True` often does not provide a performance benefit. It is + /// likely to copy dataframes in the background. + /// - It encourages mutation rather than immutable data, which is harder to + /// reason about and may cause bugs. + /// - It removes the ability to use the method chaining style for `pandas` + /// code. /// /// ## Example /// ```python diff --git a/docs/rules/use-of-inplace-argument.md b/docs/rules/use-of-inplace-argument.md index 431104b5b8d57..31eb5b7c41ca8 100644 --- a/docs/rules/use-of-inplace-argument.md +++ b/docs/rules/use-of-inplace-argument.md @@ -8,9 +8,12 @@ Autofix is always available. Checks for `inplace=True` in code using the `pandas` library. ## Why is this bad? -- Many people expect `inplace=True` to be a performance benefit that prevents dataframe copies, but that's often not true. -- It encourages mutation rather than immutable data, which is harder to reason about and may cause bugs. -- It removes the ability to use the chaining style for `pandas` code. +- `inplace=True` often does not provide a performance benefit. It is +likely to copy dataframes in the background. +- It encourages mutation rather than immutable data, which is harder to +reason about and may cause bugs. +- It removes the ability to use the method chaining style for `pandas` +code. ## Example ```python From bb65f53a5c0051b37d315486de86efe668555001 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Sun, 12 Feb 2023 11:54:23 -0500 Subject: [PATCH 7/8] Tweak docs --- .../rules/pandas_vet/rules/inplace_argument.rs | 15 ++++++++------- docs/rules/use-of-inplace-argument.md | 15 ++++++++------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs b/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs index 44e6edc66a83f..15c1570dd5358 100644 --- a/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs +++ b/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs @@ -9,15 +9,16 @@ use crate::violation::AlwaysAutofixableViolation; define_violation!( /// ## What it does - /// Checks for `inplace=True` in code using the `pandas` library. + /// Checks for `inplace=True` usages in `pandas` function and method + /// calls. /// /// ## Why is this bad? - /// - `inplace=True` often does not provide a performance benefit. It is - /// likely to copy dataframes in the background. - /// - It encourages mutation rather than immutable data, which is harder to - /// reason about and may cause bugs. - /// - It removes the ability to use the method chaining style for `pandas` - /// code. + /// Using `inplace=True` encourages mutation rather than immutable data, + /// which is harder to reason about and may cause bugs. It also removes the + /// ability to use the method chaining style for `pandas` operations. + /// + /// Further, in many cases, `inplace=True` does not provide a performance + /// benefit, as Pandas will often copy DataFrames in the background. /// /// ## Example /// ```python diff --git a/docs/rules/use-of-inplace-argument.md b/docs/rules/use-of-inplace-argument.md index 31eb5b7c41ca8..766fad8e03a74 100644 --- a/docs/rules/use-of-inplace-argument.md +++ b/docs/rules/use-of-inplace-argument.md @@ -5,15 +5,16 @@ Derived from the **pandas-vet** linter. Autofix is always available. ## What it does -Checks for `inplace=True` in code using the `pandas` library. +Checks for `inplace=True` usages in `pandas` function and method +calls. ## Why is this bad? -- `inplace=True` often does not provide a performance benefit. It is -likely to copy dataframes in the background. -- It encourages mutation rather than immutable data, which is harder to -reason about and may cause bugs. -- It removes the ability to use the method chaining style for `pandas` -code. +Using `inplace=True` encourages mutation rather than immutable data, +which is harder to reason about and may cause bugs. It also removes the +ability to use the method chaining style for `pandas` operations. + +Further, in many cases, `inplace=True` does not provide a performance +benefit, as Pandas will often copy DataFrames in the background. ## Example ```python From e98e39edf96b9109915de4aba9b5678dc635d046 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Sun, 12 Feb 2023 13:08:09 -0500 Subject: [PATCH 8/8] Fix lint --- crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs | 4 ++-- docs/rules/use-of-inplace-argument.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs b/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs index 15c1570dd5358..4336ec8e9c33f 100644 --- a/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs +++ b/crates/ruff/src/rules/pandas_vet/rules/inplace_argument.rs @@ -18,7 +18,7 @@ define_violation!( /// ability to use the method chaining style for `pandas` operations. /// /// Further, in many cases, `inplace=True` does not provide a performance - /// benefit, as Pandas will often copy DataFrames in the background. + /// benefit, as `pandas` will often copy `DataFrames` in the background. /// /// ## Example /// ```python @@ -41,7 +41,7 @@ impl AlwaysAutofixableViolation for UseOfInplaceArgument { } fn autofix_title(&self) -> String { - format!("Assign to variable and remove the `inplace` arg") + format!("Assign to variable; remove `inplace` arg") } } diff --git a/docs/rules/use-of-inplace-argument.md b/docs/rules/use-of-inplace-argument.md index 766fad8e03a74..ffad4a6510406 100644 --- a/docs/rules/use-of-inplace-argument.md +++ b/docs/rules/use-of-inplace-argument.md @@ -14,7 +14,7 @@ which is harder to reason about and may cause bugs. It also removes the ability to use the method chaining style for `pandas` operations. Further, in many cases, `inplace=True` does not provide a performance -benefit, as Pandas will often copy DataFrames in the background. +benefit, as `pandas` will often copy `DataFrames` in the background. ## Example ```python