From 50623ac0f4039f2c53cddd294fcadc071f0bd94e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 12 Mar 2021 17:24:26 +0100 Subject: [PATCH 1/8] add support for getproperty broadcasting --- src/other/broadcasting.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index b12f965640..ac61265323 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -121,6 +121,14 @@ end Base.dotview(df::SubDataFrame, ::typeof(!), idxs) = throw(ArgumentError("broadcasting with ! row selector is not allowed for SubDataFrame")) + +if isdefined(Base, :dotgetproperty) + Base.dotgetproperty(df::DataFrame, col::SymbolOrString) = + LazyNewColDataFrame(df, Symbol(col)) + Base.dotgetproperty(df::SubDataFrame, ::SymbolOrString) = + throw(ArgumentError("broadcasting getproperty is not allowed for SubDataFrame")) +end + function Base.copyto!(lazydf::LazyNewColDataFrame, bc::Base.Broadcast.Broadcasted{T}) where T if bc isa Base.Broadcast.Broadcasted{<:Base.Broadcast.AbstractArrayStyle{0}} bc_tmp = Base.Broadcast.Broadcasted{T}(bc.f, bc.args, ()) From 79eefb2999940d04636da49f368a6da1e7123552 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 12 Mar 2021 17:33:16 +0100 Subject: [PATCH 2/8] improve the message --- src/other/broadcasting.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index ac61265323..1a68092789 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -125,8 +125,9 @@ Base.dotview(df::SubDataFrame, ::typeof(!), idxs) = if isdefined(Base, :dotgetproperty) Base.dotgetproperty(df::DataFrame, col::SymbolOrString) = LazyNewColDataFrame(df, Symbol(col)) - Base.dotgetproperty(df::SubDataFrame, ::SymbolOrString) = - throw(ArgumentError("broadcasting getproperty is not allowed for SubDataFrame")) + Base.dotgetproperty(df::SubDataFrame, col::SymbolOrString) = + throw(ArgumentError("broadcasting getproperty is not allowed for SubDataFrame since " * + "Julia 1.7. use `df[:, $col] .= ... instead")) end function Base.copyto!(lazydf::LazyNewColDataFrame, bc::Base.Broadcast.Broadcasted{T}) where T From 6ac1a09a7d85ddd91f3557b14eb35eba8cf15694 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 12 Mar 2021 20:31:09 +0100 Subject: [PATCH 3/8] update docs and tests --- NEWS.md | 9 +++++++++ docs/src/lib/indexing.md | 4 ++-- src/other/broadcasting.jl | 9 ++++++--- test/broadcasting.jl | 18 ++++++++++++++++++ 4 files changed, 35 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index 055e5f6cf3..fa956f2b26 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,9 @@ ## Breaking changes * No breaking changes are planned for v1.0 release +* Since Julia 1.7 broadcasting assignment to an existing column of a `DataFrame` + selected as a property replaces the column (on earlier Julia versions this is + an in-place operation) ## Bug fixes @@ -22,9 +25,15 @@ ([#2496](https://github.com/JuliaData/DataFrames.jl/pull/2496)) * `names` now allows passing a predicate as a column selector ([#2417](https://github.com/JuliaData/DataFrames.jl/pull/2417)) +* since Julia 1.7 using broadcasting assignment on a `DataFrames` column + selected as a property is allowed even when it does not exist in a data frame + ([#2655](https://github.com/JuliaData/DataFrames.jl/pull/2655)) ## Deprecated +* since Julia 1.7 using broadcasting assignment on a `SubDataFrames` column + selected as a property is deprecated + ([#2655](https://github.com/JuliaData/DataFrames.jl/pull/2655)) * all old deprecations now throw an error ([#2554](https://github.com/JuliaData/DataFrames.jl/pull/2554)) diff --git a/docs/src/lib/indexing.md b/docs/src/lib/indexing.md index 7de477d7e6..5d1f2c270a 100644 --- a/docs/src/lib/indexing.md +++ b/docs/src/lib/indexing.md @@ -192,10 +192,10 @@ Additional rules: if `col` is `Symbol` or `AbstractString` and it is missing from `df` then a new column is allocated added; the length of the column is always the value of `nrow(df)` before the assignment takes place; * the `df[!, cols] .= v` syntax replaces existing columns `cols` in data frame `df` with freshly allocated vectors; -* `df.col .= v` syntax is allowed and performs in-place assignment to an existing vector `df.col`. +* `df.col .= v` syntax is allowed up to Julia 1.6 and performs in-place assignment to an existing vector `df.col`. Since Julia 1.7 column gets replaced. * in the `sdf[CartesianIndex(row, col)] .= v`, `sdf[row, col] .= v` and `sdf[row, cols] .= v` syntaxes the assignment to `sdf` is performed in-place; * in the `sdf[rows, col] .= v` and `sdf[rows, cols] .= v` syntaxes the assignment to `sdf` is performed in-place; -* `sdf.col .= v` syntax is allowed and performs in-place assignment to an existing vector `sdf.col`. +* `sdf.col .= v` syntax is allowed and up to Julia 1.6 performs an in-place assignment to an existing vector `sdf.col`. Since Julia 1.7 this is deprecated. * `dfr.col .= v` syntax is allowed and performs in-place assignment to a value extracted by `dfr.col`. Note that `sdf[!, col] .= v` and `sdf[!, cols] .= v` syntaxes are not allowed as `sdf` can be only modified in-place. diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index 1a68092789..05c3182115 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -125,9 +125,12 @@ Base.dotview(df::SubDataFrame, ::typeof(!), idxs) = if isdefined(Base, :dotgetproperty) Base.dotgetproperty(df::DataFrame, col::SymbolOrString) = LazyNewColDataFrame(df, Symbol(col)) - Base.dotgetproperty(df::SubDataFrame, col::SymbolOrString) = - throw(ArgumentError("broadcasting getproperty is not allowed for SubDataFrame since " * - "Julia 1.7. use `df[:, $col] .= ... instead")) + + function Base.dotgetproperty(df::SubDataFrame, col::SymbolOrString) + Base.depwarn("broadcasting getproperty is deprecated for SubDataFrame " * + "since Julia 1.7. Use `df[:, $col] .= ... instead", :dotgetproperty) + return getproperty(df, col) + end end function Base.copyto!(lazydf::LazyNewColDataFrame, bc::Base.Broadcast.Broadcasted{T}) where T diff --git a/test/broadcasting.jl b/test/broadcasting.jl index b4baf252c8..ad7a6bbf25 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -1842,4 +1842,22 @@ end @test_throws DimensionMismatch df[:, "z"] .= z end +@testset "broadcasting of getproperty" begin + if isdefined(Base, :dotgetproperty) + df = DataFrame(a=1:4) + df.b .= 1 + df.c .= 4:-1:1 + # since Julia 1.7 this replaces column + # on previous Julia versions conversion of Char to Int is performed + df.a .= 'a':'d' + @test df == DataFrame(a='a':'d', b=1, c=4:-1:1) + + # this behavior is deprecated; change this test to an error when + # fully switching to Julia 1.7 support + dfv = view(df, 2:3, 2:3) + dfv.b .= 0 + @test df.b == [1, 0, 0, 1] + end +end + end # module From 0fe53a4d276cd02d0b253d5fdd8d18cb064e9c0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 14 Mar 2021 23:02:16 +0100 Subject: [PATCH 4/8] change implementation --- src/other/broadcasting.jl | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index 05c3182115..f4b4e42824 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -122,13 +122,23 @@ Base.dotview(df::SubDataFrame, ::typeof(!), idxs) = throw(ArgumentError("broadcasting with ! row selector is not allowed for SubDataFrame")) +# TODO: remove the deprecations when Julia 1.7 functionality is commonly used +# by the community if isdefined(Base, :dotgetproperty) - Base.dotgetproperty(df::DataFrame, col::SymbolOrString) = - LazyNewColDataFrame(df, Symbol(col)) + function Base.dotgetproperty(df::DataFrame, col::SymbolOrString) + if columnindex(df, col) == 0 + return LazyNewColDataFrame(df, Symbol(col)) + else + Base.depwarn("In the future this operation will allocate a new column" * + "instead of performing an in-place assignment.", :dotgetproperty) + return getproperty(df, col) + end + end function Base.dotgetproperty(df::SubDataFrame, col::SymbolOrString) - Base.depwarn("broadcasting getproperty is deprecated for SubDataFrame " * - "since Julia 1.7. Use `df[:, $col] .= ... instead", :dotgetproperty) + Base.depwarn("broadcasting getproperty is deprecated for SubDataFrame and " * + "will be disallowed in the future. Use `df[:, $col] .= ... instead", + :dotgetproperty) return getproperty(df, col) end end From 6e6540f27d689603810d7c06d48141242e4e4c4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 22 Mar 2021 11:30:24 +0100 Subject: [PATCH 5/8] use the agreed rules --- NEWS.md | 15 +++++++++------ docs/src/lib/indexing.md | 15 ++++++++++----- test/broadcasting.jl | 15 +++++---------- 3 files changed, 24 insertions(+), 21 deletions(-) diff --git a/NEWS.md b/NEWS.md index fa956f2b26..dcd226557d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,9 +3,6 @@ ## Breaking changes * No breaking changes are planned for v1.0 release -* Since Julia 1.7 broadcasting assignment to an existing column of a `DataFrame` - selected as a property replaces the column (on earlier Julia versions this is - an in-place operation) ## Bug fixes @@ -26,14 +23,20 @@ * `names` now allows passing a predicate as a column selector ([#2417](https://github.com/JuliaData/DataFrames.jl/pull/2417)) * since Julia 1.7 using broadcasting assignment on a `DataFrames` column - selected as a property is allowed even when it does not exist in a data frame + selected as a property is allowed when it does not exist in a data frame + and it allocates a fresh column ([#2655](https://github.com/JuliaData/DataFrames.jl/pull/2655)) ## Deprecated -* since Julia 1.7 using broadcasting assignment on a `SubDataFrames` column - selected as a property is deprecated +* Using broadcasting assignment on a `SubDataFrames` column selected as a property + is deprecated; it will be disallowed in the future. ([#2655](https://github.com/JuliaData/DataFrames.jl/pull/2655)) +* Broadcasting assignment to an existing column of a `DataFrame` + selected as a property being an in-place operation is deprecated. It will + allocate a fresh column in the future + ([#2655](https://github.com/JuliaData/DataFrames.jl/pull/2655)) + * all old deprecations now throw an error ([#2554](https://github.com/JuliaData/DataFrames.jl/pull/2554)) diff --git a/docs/src/lib/indexing.md b/docs/src/lib/indexing.md index 5d1f2c270a..b1c2f9582e 100644 --- a/docs/src/lib/indexing.md +++ b/docs/src/lib/indexing.md @@ -183,19 +183,24 @@ In such an operation `AbstractDataFrame` is considered as two-dimensional and `D `DataFrameRow` is considered to be column-oriented. Additional rules: -* in the `df[CartesianIndex(row, col)] .= v`, `df[row, col] .= v` syntaxes `v` is broadcasted into the contents of `df[row, col]` (this is consistent with Julia Base); +* in the `df[CartesianIndex(row, col)] .= v`, `df[row, col] .= v` syntaxes `v` is + broadcasted into the contents of `df[row, col]` (this is consistent with Julia Base); * in the `df[row, cols] .= v` syntaxes the assignment to `df` is performed in-place; -* in the `df[rows, col] .= v` and `df[rows, cols] .= v` syntaxes the assignment to `df` is performed in-place; - if `rows` is `:` and `col` is `Symbol` or `AbstractString` and it is missing from `df` then a new column is allocated and added; +* in the `df[rows, col] .= v` and `df[rows, cols] .= v` syntaxes the assignment to + `df` is performed in-place; if `rows` is `:` and `col` is `Symbol` or `AbstractString` + and it is missing from `df` then a new column is allocated and added; the length of the column is always the value of `nrow(df)` before the assignment takes place; * in the `df[!, col] .= v` syntax column `col` is replaced by a freshly allocated vector; if `col` is `Symbol` or `AbstractString` and it is missing from `df` then a new column is allocated added; the length of the column is always the value of `nrow(df)` before the assignment takes place; * the `df[!, cols] .= v` syntax replaces existing columns `cols` in data frame `df` with freshly allocated vectors; -* `df.col .= v` syntax is allowed up to Julia 1.6 and performs in-place assignment to an existing vector `df.col`. Since Julia 1.7 column gets replaced. +* `df.col .= v` currently syntax performs in-place assignment to an existing vector `df.col`; + this behavior is deprecated and a freshcolumn will be allocated in the future. + Starting from Julia 1.7 if `:col` is not present in `df` then a new column will be created in `df`. * in the `sdf[CartesianIndex(row, col)] .= v`, `sdf[row, col] .= v` and `sdf[row, cols] .= v` syntaxes the assignment to `sdf` is performed in-place; * in the `sdf[rows, col] .= v` and `sdf[rows, cols] .= v` syntaxes the assignment to `sdf` is performed in-place; -* `sdf.col .= v` syntax is allowed and up to Julia 1.6 performs an in-place assignment to an existing vector `sdf.col`. Since Julia 1.7 this is deprecated. +* `sdf.col .= v` syntax is performs an in-place assignment to an existing vector `sdf.col` and is deprecated; + in the future this operation will not be allowed. * `dfr.col .= v` syntax is allowed and performs in-place assignment to a value extracted by `dfr.col`. Note that `sdf[!, col] .= v` and `sdf[!, cols] .= v` syntaxes are not allowed as `sdf` can be only modified in-place. diff --git a/test/broadcasting.jl b/test/broadcasting.jl index ad7a6bbf25..d35f200840 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -1847,16 +1847,11 @@ end df = DataFrame(a=1:4) df.b .= 1 df.c .= 4:-1:1 - # since Julia 1.7 this replaces column - # on previous Julia versions conversion of Char to Int is performed - df.a .= 'a':'d' - @test df == DataFrame(a='a':'d', b=1, c=4:-1:1) - - # this behavior is deprecated; change this test to an error when - # fully switching to Julia 1.7 support - dfv = view(df, 2:3, 2:3) - dfv.b .= 0 - @test df.b == [1, 0, 0, 1] + # TODO: enable this in the future when the deprecation period is finished + # df.a .= 'a':'d' + # @test df == DataFrame(a='a':'d', b=1, c=4:-1:1) + # dfv = view(df, 2:3, 2:3) + # @test_throws ArgumentError dfv.b .= 0 end end From aa3ae554f295ecda9528c49f8b45e5d45e0a0604 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 22 Mar 2021 11:30:38 +0100 Subject: [PATCH 6/8] more tests --- test/deprecated.jl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/deprecated.jl b/test/deprecated.jl index 4dcfcded0b..6856beda97 100644 --- a/test/deprecated.jl +++ b/test/deprecated.jl @@ -7,6 +7,15 @@ using Test, DataFrames @test_throws ArgumentError aggregate() end +@testset "deprecated broadcasting assignment" begin + df = DataFrame(a=1:4, b=1, c=2) + df.a .= 'a':'d' + @test df == DataFrame(a=97:100, b=1, c=2) + dfv = view(df, 2:3, 2:3) + dfv.b .= 0 + @test df.b == [1, 0, 0, 1] +end + @testset "All indexing" begin df = DataFrame(a=1, b=2, c=3) From f618b97b9b2a64d48e7788a4ef1d557b62ea92b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 22 Mar 2021 19:04:05 +0100 Subject: [PATCH 7/8] fix test --- test/broadcasting.jl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/test/broadcasting.jl b/test/broadcasting.jl index d35f200840..3caae1faa1 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -1458,8 +1458,13 @@ end @test v1 == [100.0, 100.0, 100.0] df = copy(refdf) - @test_throws ArgumentError df.newcol .= 'd' - @test df == refdf + if isdefined(Base, :dotgetproperty) + df.newcol .= 'd' + @test df == [refdf DataFrame(newcol=fill('d', 3))] + else + @test_throws ArgumentError df.newcol .= 'd' + @test df == refdf + end df = view(copy(refdf), :, :) v1 = df[!, 1] From 170a97f14c82059f5d90af04de7074fb3410faeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 24 Mar 2021 11:39:04 +0100 Subject: [PATCH 8/8] Apply suggestions from code review Co-authored-by: Milan Bouchet-Valat --- NEWS.md | 12 ++++++------ docs/src/lib/indexing.md | 4 ++-- src/other/broadcasting.jl | 2 +- test/broadcasting.jl | 1 + 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/NEWS.md b/NEWS.md index 8093178d9b..7ad1644d0c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -26,9 +26,9 @@ additional column to be added in the last position in the resulting data frame that will identify the source data frame. ([#2649](https://github.com/JuliaData/DataFrames.jl/pull/2649)) -* since Julia 1.7 using broadcasting assignment on a `DataFrames` column - selected as a property is allowed when it does not exist in a data frame - and it allocates a fresh column +* since Julia 1.7 using broadcasting assignment on a `DataFrame` column + selected as a property (e.g. `df.col .= 1`) is allowed when column does not + exist and it allocates a fresh column ([#2655](https://github.com/JuliaData/DataFrames.jl/pull/2655)) ## Deprecated @@ -37,11 +37,11 @@ is deprecated in favor of `source` keyword argument; `indicator` will be removed in 2.0 release ([2649](https://github.com/JuliaData/DataFrames.jl/pull/2649)) * Using broadcasting assignment on a `SubDataFrames` column selected as a property - is deprecated; it will be disallowed in the future. + (e.g. `sdf.col .= 1`) is deprecated; it will be disallowed in the future. ([#2655](https://github.com/JuliaData/DataFrames.jl/pull/2655)) * Broadcasting assignment to an existing column of a `DataFrame` - selected as a property being an in-place operation is deprecated. It will - allocate a fresh column in the future + selected as a property (e.g. `df.col .= 1`) being an in-place + operation is deprecated. It will allocate a fresh column in the future ([#2655](https://github.com/JuliaData/DataFrames.jl/pull/2655)) * all deprecations present in 0.22 release now throw an error ([#2554](https://github.com/JuliaData/DataFrames.jl/pull/2554)) diff --git a/docs/src/lib/indexing.md b/docs/src/lib/indexing.md index b1c2f9582e..44869d3c3d 100644 --- a/docs/src/lib/indexing.md +++ b/docs/src/lib/indexing.md @@ -194,8 +194,8 @@ Additional rules: if `col` is `Symbol` or `AbstractString` and it is missing from `df` then a new column is allocated added; the length of the column is always the value of `nrow(df)` before the assignment takes place; * the `df[!, cols] .= v` syntax replaces existing columns `cols` in data frame `df` with freshly allocated vectors; -* `df.col .= v` currently syntax performs in-place assignment to an existing vector `df.col`; - this behavior is deprecated and a freshcolumn will be allocated in the future. +* `df.col .= v` syntax currently performs in-place assignment to an existing vector `df.col`; + this behavior is deprecated and a new column will be allocated in the future. Starting from Julia 1.7 if `:col` is not present in `df` then a new column will be created in `df`. * in the `sdf[CartesianIndex(row, col)] .= v`, `sdf[row, col] .= v` and `sdf[row, cols] .= v` syntaxes the assignment to `sdf` is performed in-place; * in the `sdf[rows, col] .= v` and `sdf[rows, cols] .= v` syntaxes the assignment to `sdf` is performed in-place; diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index f4b4e42824..6760925e2f 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -137,7 +137,7 @@ if isdefined(Base, :dotgetproperty) function Base.dotgetproperty(df::SubDataFrame, col::SymbolOrString) Base.depwarn("broadcasting getproperty is deprecated for SubDataFrame and " * - "will be disallowed in the future. Use `df[:, $col] .= ... instead", + "will be disallowed in the future. Use `df[:, $(repr(col))] .= ... instead", :dotgetproperty) return getproperty(df, col) end diff --git a/test/broadcasting.jl b/test/broadcasting.jl index 3caae1faa1..570b2fb892 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -1854,6 +1854,7 @@ end df.c .= 4:-1:1 # TODO: enable this in the future when the deprecation period is finished # df.a .= 'a':'d' + # @test df.a isa Vector{Char} # @test df == DataFrame(a='a':'d', b=1, c=4:-1:1) # dfv = view(df, 2:3, 2:3) # @test_throws ArgumentError dfv.b .= 0