From 81a22dc1f0b4f1216bb09f2b7e468021864f8278 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 26 Jul 2019 18:05:16 +0200 Subject: [PATCH 01/30] part 1: target setindex --- docs/src/lib/indexing.md | 17 +++++++++-------- src/dataframe/dataframe.jl | 29 ++++++++++++++++++++++++----- src/dataframerow/dataframerow.jl | 19 +++++++++++++++++++ src/deprecated.jl | 12 +++--------- 4 files changed, 55 insertions(+), 22 deletions(-) diff --git a/docs/src/lib/indexing.md b/docs/src/lib/indexing.md index b7dab3bd41..5e714ffe83 100644 --- a/docs/src/lib/indexing.md +++ b/docs/src/lib/indexing.md @@ -109,15 +109,12 @@ then view points to selected columns by their number at the moment of creation o ## `setindex!` -The following list specifies the **target** behavior of `setindex!` operations depending on argument types. - -In the current release of DataFrames.jl we are in the transition period when an old, undocumented, behavior -of `setindex!` is still supported, but throws deprecation warnings. - -The behavior described below will be fully implemented in the next major release of DataFrames.jl. +The following list specifies the behavior of `setindex!` operations depending on argument types. In particular a description explicitly mentions if the assignment is *in-place*. +Note that `setindex!` operations are not atomic. + `setindex!` on `DataFrame`: * `df[row, col] = v` -> set value of `col` in row `row` to `v` in-place; * `df[CartesianIndex(row, col)] = v` -> the same as `df[row, col] = v`; @@ -150,7 +147,9 @@ Note that `sdf[!, col] = v`, `sdf[!, cols] = v` and `sdf.col = v` are not allowe * `dfr[col] = v` -> set value of `col` in row `row` to `v` in-place; equivalent to `dfr.col = v` if `col` is a valid identifier; * `dfr[cols] = v` -> set values of entries in columns `cols` in `dfr` by elements of `v` in place; - `v` can be an `AbstractVector` or `v` can be a `NamedTuple` or `DataFrameRow` when column names must match; + `v` can be an iterable in which case it must have a number of elements equal to `length(dfr)`, + or `v` can be a `NamedTuple`, `AbstractDict` or `DataFrameRow` when column names must match + (in the case of `NamedTuple` and `DataFrameRow` also the order of column names must match); ## Broadcasting @@ -162,6 +161,8 @@ The following broadcasting rules apply to `AbstractDataFrame` objects: of dimensionality higher than two. * If multiple `AbstractDataFrame` objects take part in broadcasting then they have to have identical column names. +Note that broadcasting assignment operations are not atomic. + Broadcasting `DataFrameRow` is currently not allowed (which is consistent with `NamedTuple`). It is possible to assign a value to `AbstractDataFrame` and `DataFrameRow` objects using the `.=` operator. @@ -177,7 +178,7 @@ Additional rules: * in the `df[row, cols] .= v` syntaxes the assignment to `df` is performed in-place; * in the `df[rows, col] .= v` and `df[rows, cols] .= v` syntaxes the assignment to `df` is performed in-place; * in the `df[!, col] .= v` syntax column `col` is replaced by a freshly allocated vector; if `col` is `Symbol` and it is missing from `df` then a new column is added; the length of the column is always the value of `nrow(df)` before the assignment takes place; -* `df[!, cols] = v` syntax is currently disallowed, but is planned to be supported in the future; +* `df[!, cols] .= v` syntax is currently disallowed, but is planned to be supported in the future; * `df.col .= v` syntax is allowed and performs in-place assignment to an existing vector `df.col`. * in the `sdf[CartesianIndex(row, col)] .= v`, `sdf[row, col] .= v` and `sdf[row, cols] .= v` syntaxes the assignment to `sdf` is performed in-place; * in the `sdf[rows, col] .= v` and `sdf[rows, cols] .= v` syntaxes the assignment to `sdf` is performed in-place; diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index 0286b7cf42..a7d3e3443b 100644 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -498,10 +498,29 @@ function Base.setindex!(df::DataFrame, v::Any, row_ind::Integer, col_ind::Column return df end +# df[SingleRowIndex, MultiColumnIndex] = value +# the method for value of type DataFrameRow, AbstractDict and NamedTuple +# is defined in dataframerow.jl + +function Base.setindex!(df::DataFrame, + v, + row_ind::Integer, + col_inds::Union{AbstractVector, Regex, Not, Colon}) + idxs = index(df)[col_inds] + if length(v) != length(idxs) + throw(DimensionMismatch("$(length(idxs)) columns were selected and the assigned" * + " value contains $(length(v)) elements") + end + for (i, x) in enumerate(v) + df[row_ind, i] = x + end + return df +end + # df[MultiRowIndex, SingleColumnIndex] = AbstractVector function Base.setindex!(df::DataFrame, v::AbstractVector, - row_inds::Union{AbstractVector, Not}, # add Colon after deprecation + row_inds::Union{AbstractVector, Not, Colon}, col_ind::ColumnIndex) x = df[!, col_ind] try @@ -517,8 +536,8 @@ end # df[MultiRowIndex, MultiColumnIndex] = AbstractDataFrame function Base.setindex!(df::DataFrame, new_df::AbstractDataFrame, - row_inds::Union{AbstractVector, Not}, # add Colon after deprecation - col_inds::Union{AbstractVector, Regex, Not}) # add Colon after deprecation + row_inds::Union{AbstractVector, Not, Colon}, + col_inds::Union{AbstractVector, Regex, Not, Colon}) idxs = index(df)[col_inds] if view(_names(df), idxs) != _names(new_df) Base.depwarn("in the future column names in source and target will have to match", :setindex!) @@ -532,8 +551,8 @@ end # df[MultiRowIndex, MultiColumnIndex] = AbstractMatrix function Base.setindex!(df::DataFrame, mx::AbstractMatrix, - row_inds::Union{AbstractVector, Not}, # add Colon after deprecation - col_inds::Union{AbstractVector, Regex, Not}) # add Colon after deprecation + row_inds::Union{AbstractVector, Not, Colon}, + col_inds::Union{AbstractVector, Regex, Not, Colon}) idxs = index(df)[col_inds] if size(mx, 2) != length(idxs) throw(DimensionMismatch("number of selected columns ($(length(idxs))) and number of columns in" * diff --git a/src/dataframerow/dataframerow.jl b/src/dataframerow/dataframerow.jl index b553fcd170..751a2c6d89 100644 --- a/src/dataframerow/dataframerow.jl +++ b/src/dataframerow/dataframerow.jl @@ -98,6 +98,25 @@ Base.@propagate_inbounds Base.getindex(r::DataFrameRow, idxs::Union{AbstractVect DataFrameRow(parent(r), row(r), parentcols(index(r), idxs)) Base.@propagate_inbounds Base.getindex(r::DataFrameRow, ::Colon) = r +function Base.setindex!(df::DataFrame, + v::Union{DataFrameRow, NamedTuple, AbstractDict}, + row_ind::Integer, + col_inds::Union{AbstractVector, Regex, Not, Colon}) + idxs = index(df)[col_inds] + if length(v) != length(idxs) + throw(DimensionMismatch("$(length(idxs)) columns were selected and the assigned" * + " value contains $(length(v)) elements") + end + + if !(v isa AbstractDict || all(((a, b),) -> a == b, zip(view(_names(df), idxs), keys(v)))) + throw(ArgumentError("Selected column names do not match the names in assigned value")) + end + for (col, val) in pairs(v) + df[row_ind, col] = val + end + return df +end + Base.@propagate_inbounds function Base.setindex!(r::DataFrameRow, value::Any, idx) col = parentcols(index(r), idx) if !(col isa Int) diff --git a/src/deprecated.jl b/src/deprecated.jl index 0804616770..50c82ecba0 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -1351,9 +1351,6 @@ import Base: delete!, insert!, merge! @deprecate insert!(df::DataFrame, col_ind::Int, item, name::Symbol; makeunique::Bool=false) insertcols!(df, col_ind, name => item; makeunique=makeunique) @deprecate merge!(df1::DataFrame, df2::AbstractDataFrame) (foreach(col -> df1[!, col] = df2[!, col], names(df2)); df1) -import Base: setindex! -@deprecate setindex!(df::DataFrame, x::Nothing, col_ind::Int) select!(df, Not(col_ind)) - import Base: map @deprecate map(f::Function, sdf::SubDataFrame) f(sdf) @deprecate map(f::Union{Function,Type}, dfc::DataFrameColumns{<:AbstractDataFrame, Pair{Symbol, AbstractVector}}) mapcols(f, dfc.df) @@ -1406,6 +1403,7 @@ import Base: view @deprecate view(adf::AbstractDataFrame, colinds) view(adf, :, colinds) import Base: setindex! +@deprecate setindex!(df::DataFrame, x::Nothing, col_ind::Int) select!(df, Not(col_ind)) @deprecate setindex!(sdf::SubDataFrame, val::Any, colinds::Any) (sdf[:, colinds] = val; sdf) @deprecate setindex!(df::DataFrame, v::AbstractVector, col_ind::ColumnIndex) (df[!, col_ind] = v; df) @@ -1473,10 +1471,6 @@ end @deprecate setindex!(df::DataFrame, v::Any, row_ind::Integer, col_inds::AbstractVector{<:ColumnIndex}) (df[row_ind, col_inds] .= Ref(v); df) -# df[:, SingleColumnIndex] = AbstractVector -@deprecate setindex!(df::DataFrame, v::AbstractVector, ::Colon, - col_ind::ColumnIndex) (df[!, col_ind] = v; df) - # df[MultiRowIndex, SingleColumnIndex] = Single Item function Base.setindex!(df::DataFrame, v::Any, @@ -1584,7 +1578,7 @@ import Base: setproperty! @deprecate setproperty!(df::DataFrame, col_ind::Symbol, v) (df[!, col_ind] .= v) @deprecate setproperty!(df::SubDataFrame, col_ind::Symbol, v) (df[:, col_ind] .= v) -# There methods duplicate functionality but are needed to resolve method call ambiuguities +##### BEGIN: There methods duplicate functionality but are needed to resolve method call ambiuguities function Base.setindex!(df::DataFrame, new_df::AbstractDataFrame, @@ -1672,4 +1666,4 @@ function Base.setindex!(df::DataFrame, return df end -####### END: duplicate methods +##### END: duplicate methods From 5acea57e0290895a76b3ca7348129a4680501f76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 4 Aug 2019 17:40:52 +0200 Subject: [PATCH 02/30] Apply suggestions from code review Co-Authored-By: Milan Bouchet-Valat --- docs/src/lib/indexing.md | 2 +- src/dataframerow/dataframerow.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/lib/indexing.md b/docs/src/lib/indexing.md index 5e714ffe83..85dcb220ed 100644 --- a/docs/src/lib/indexing.md +++ b/docs/src/lib/indexing.md @@ -178,7 +178,7 @@ Additional rules: * in the `df[row, cols] .= v` syntaxes the assignment to `df` is performed in-place; * in the `df[rows, col] .= v` and `df[rows, cols] .= v` syntaxes the assignment to `df` is performed in-place; * in the `df[!, col] .= v` syntax column `col` is replaced by a freshly allocated vector; if `col` is `Symbol` and it is missing from `df` then a new column is added; the length of the column is always the value of `nrow(df)` before the assignment takes place; -* `df[!, cols] .= v` syntax is currently disallowed, but is planned to be supported in the future; +* the `df[!, cols] .= v` syntax is currently disallowed, but is planned to be supported in the future; * `df.col .= v` syntax is allowed and performs in-place assignment to an existing vector `df.col`. * in the `sdf[CartesianIndex(row, col)] .= v`, `sdf[row, col] .= v` and `sdf[row, cols] .= v` syntaxes the assignment to `sdf` is performed in-place; * in the `sdf[rows, col] .= v` and `sdf[rows, cols] .= v` syntaxes the assignment to `sdf` is performed in-place; diff --git a/src/dataframerow/dataframerow.jl b/src/dataframerow/dataframerow.jl index 751a2c6d89..cdf2962fb2 100644 --- a/src/dataframerow/dataframerow.jl +++ b/src/dataframerow/dataframerow.jl @@ -104,7 +104,7 @@ function Base.setindex!(df::DataFrame, col_inds::Union{AbstractVector, Regex, Not, Colon}) idxs = index(df)[col_inds] if length(v) != length(idxs) - throw(DimensionMismatch("$(length(idxs)) columns were selected and the assigned" * + throw(DimensionMismatch("$(length(idxs)) columns were selected but the assigned" * " value contains $(length(v)) elements") end From bcad2e6db524e5f624e89680f6b1026e12598ddd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 4 Aug 2019 18:07:37 +0200 Subject: [PATCH 03/30] fixes after the review --- docs/src/lib/indexing.md | 13 ++++++++----- src/dataframerow/dataframerow.jl | 13 +++++++++++-- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/docs/src/lib/indexing.md b/docs/src/lib/indexing.md index 85dcb220ed..cb0a693b6a 100644 --- a/docs/src/lib/indexing.md +++ b/docs/src/lib/indexing.md @@ -113,7 +113,8 @@ The following list specifies the behavior of `setindex!` operations depending on In particular a description explicitly mentions if the assignment is *in-place*. -Note that `setindex!` operations are not atomic. +Note that if `setindex!` operation throws an error the target data frame may be partially changed +so it is unsafe to use it afterwards. `setindex!` on `DataFrame`: * `df[row, col] = v` -> set value of `col` in row `row` to `v` in-place; @@ -147,9 +148,9 @@ Note that `sdf[!, col] = v`, `sdf[!, cols] = v` and `sdf.col = v` are not allowe * `dfr[col] = v` -> set value of `col` in row `row` to `v` in-place; equivalent to `dfr.col = v` if `col` is a valid identifier; * `dfr[cols] = v` -> set values of entries in columns `cols` in `dfr` by elements of `v` in place; - `v` can be an iterable in which case it must have a number of elements equal to `length(dfr)`, - or `v` can be a `NamedTuple`, `AbstractDict` or `DataFrameRow` when column names must match - (in the case of `NamedTuple` and `DataFrameRow` also the order of column names must match); + `v` can be: 1) an iterable in which case it must have a number of elements equal to `length(dfr)`, + 2) an `AbstractDict`, when column names must match, + 3) a `NamedTuple` or `DataFrameRow` when column names and order must match; ## Broadcasting @@ -161,7 +162,9 @@ The following broadcasting rules apply to `AbstractDataFrame` objects: of dimensionality higher than two. * If multiple `AbstractDataFrame` objects take part in broadcasting then they have to have identical column names. -Note that broadcasting assignment operations are not atomic. +Note that if broadcasting assignment operation throws an error the target data frame may be partially changed +so it is unsafe to use it afterwards. + Broadcasting `DataFrameRow` is currently not allowed (which is consistent with `NamedTuple`). diff --git a/src/dataframerow/dataframerow.jl b/src/dataframerow/dataframerow.jl index cdf2962fb2..4b85afa18f 100644 --- a/src/dataframerow/dataframerow.jl +++ b/src/dataframerow/dataframerow.jl @@ -105,11 +105,20 @@ function Base.setindex!(df::DataFrame, idxs = index(df)[col_inds] if length(v) != length(idxs) throw(DimensionMismatch("$(length(idxs)) columns were selected but the assigned" * - " value contains $(length(v)) elements") + " value contains $(length(v)) elements")) end if !(v isa AbstractDict || all(((a, b),) -> a == b, zip(view(_names(df), idxs), keys(v)))) - throw(ArgumentError("Selected column names do not match the names in assigned value")) + mismatched = findall(view(_names(df), idxs) .!= collect(keys(v))) + throw(ArgumentError("Selected column names do not match the names in assigned value in" * + " positions $(join(mismatched, ", ", " and "))")) + end + if v isa AbstractDict + for n in view(_names(df), idxs) + if !haskey(v, n) + throw(ArgumentError("Column $n not found in source dictionary")) + end + end end for (col, val) in pairs(v) df[row_ind, col] = val From 5a1f12b7291c8d7f0f5d5166e17e8bd3dc018de6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 13 Aug 2019 13:56:22 +0200 Subject: [PATCH 04/30] adding functionality part 2 --- docs/src/lib/indexing.md | 6 ++-- src/dataframe/dataframe.jl | 32 +++++++++++++++++++- src/dataframerow/dataframerow.jl | 2 +- src/other/broadcasting.jl | 52 +++++++++++++------------------- 4 files changed, 57 insertions(+), 35 deletions(-) diff --git a/docs/src/lib/indexing.md b/docs/src/lib/indexing.md index 64f4c0969f..d4e71bef09 100644 --- a/docs/src/lib/indexing.md +++ b/docs/src/lib/indexing.md @@ -129,7 +129,9 @@ so it is unsafe to use it afterwards. also if `col` is a `Symbol` that is not present in `df` then a new column in `df` is created and holds `v`; equivalent to `df.col = v` if `col` is a valid identifier; this is allowed if `ncol(df) == 0 || length(v) == nrow(df)`; -* `df[!, cols] = v` -> is currently disallowed, but is planned to be supported in the future; +* `df[!, cols] = v` -> replaces existing columns `cols` in data frame `df` with copying; + `v` must be an `AbstractMatrix` or an `AbstractDataFrame` + (in this case column names must match); Note that only `df[!, col] = v` and `df.col = v` can be used to add a new column to a `DataFrame`. In particular as `df[:, col] = v` is an in-place operation it does not add a column `v` to a `DataFrame` if `col` is missing @@ -182,7 +184,7 @@ Additional rules: * in the `df[row, cols] .= v` syntaxes the assignment to `df` is performed in-place; * in the `df[rows, col] .= v` and `df[rows, cols] .= v` syntaxes the assignment to `df` is performed in-place; * in the `df[!, col] .= v` syntax column `col` is replaced by a freshly allocated vector; if `col` is `Symbol` and it is missing from `df` then a new column is added; the length of the column is always the value of `nrow(df)` before the assignment takes place; -* the `df[!, cols] .= v` syntax is currently disallowed, but is planned to be supported in the future; +* the `df[!, cols] .= v` replaces existing columns `cols` in data frame `df` with freshly allocated vectors; * `df.col .= v` syntax is allowed and performs in-place assignment to an existing vector `df.col`. * in the `sdf[CartesianIndex(row, col)] .= v`, `sdf[row, col] .= v` and `sdf[row, cols] .= v` syntaxes the assignment to `sdf` is performed in-place; * in the `sdf[rows, col] .= v` and `sdf[rows, cols] .= v` syntaxes the assignment to `sdf` is performed in-place; diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index 753d21f456..193a601bb2 100644 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -505,7 +505,7 @@ end function Base.setindex!(df::DataFrame, v, row_ind::Integer, - col_inds::Union{AbstractVector, Regex, Not, Colon}) + col_inds::Union{AbstractVector, Regex, Not, Between, All, Colon}) idxs = index(df)[col_inds] if length(v) != length(idxs) throw(DimensionMismatch("$(length(idxs)) columns were selected and the assigned" * @@ -548,6 +548,21 @@ function Base.setindex!(df::DataFrame, return df end +function Base.setindex!(df::DataFrame, + new_df::AbstractDataFrame, + row_inds::typeof(!), + col_inds::Union{AbstractVector, Regex, Not, Between, All, Colon}) + idxs = index(df)[col_inds] + if view(_names(df), idxs) != _names(new_df) + throw(ArgumentError("Column names in source and target data frames do not match")) + end + for (j, col) in enumerate(idxs) + # make sure we make a copy on assignment + df[!, col] = new_df[:, j] + end + return df +end + # df[MultiRowIndex, MultiColumnIndex] = AbstractMatrix function Base.setindex!(df::DataFrame, mx::AbstractMatrix, @@ -564,6 +579,21 @@ function Base.setindex!(df::DataFrame, return df end +function Base.setindex!(df::DataFrame, + mx::AbstractMatrix, + row_inds::typeof(!), + col_inds::Union{AbstractVector, Regex, Not, Between, All, Colon}) + idxs = index(df)[col_inds] + if size(mx, 2) != length(idxs) + throw(DimensionMismatch("number of selected columns ($(length(idxs))) and number of columns in" * + " matrix ($(size(mx, 2))) do not match")) + end + for (j, col) in enumerate(idxs) + df[!, col] = mx[:, j] + end + return df +end + ############################################################################## ## ## Mutating methods diff --git a/src/dataframerow/dataframerow.jl b/src/dataframerow/dataframerow.jl index cbb322aea6..d128b52b79 100644 --- a/src/dataframerow/dataframerow.jl +++ b/src/dataframerow/dataframerow.jl @@ -101,7 +101,7 @@ Base.@propagate_inbounds Base.getindex(r::DataFrameRow, ::Colon) = r function Base.setindex!(df::DataFrame, v::Union{DataFrameRow, NamedTuple, AbstractDict}, row_ind::Integer, - col_inds::Union{AbstractVector, Regex, Not, Colon}) + col_inds::Union{AbstractVector, Regex, Not, Between, All, Colon}) idxs = index(df)[col_inds] if length(v) != length(idxs) throw(DimensionMismatch("$(length(idxs)) columns were selected but the assigned" * diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index 2ba607a818..4d9fbcba84 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -80,14 +80,12 @@ end Base.axes(x::LazyNewColDataFrame) = (Base.OneTo(nrow(x.df)),) -# ColReplaceDataFrame is reserved for future extensions if we decide to allow df[!, cols] .= v -# # ColReplaceDataFrame allows for column replacement in broadcasting -# struct ColReplaceDataFrame -# df::DataFrame -# cols::Vector{Int} -# end +struct ColReplaceDataFrame + df::DataFrame + cols::Vector{Int} +end -# Base.axes(x::ColReplaceDataFrame) = axes(x.df) +Base.axes(x::ColReplaceDataFrame) = (axes(x.df, 1), Base.OneTo(length(x.cols))) Base.maybeview(df::AbstractDataFrame, idx::CartesianIndex{2}) = df[idx] Base.maybeview(df::AbstractDataFrame, row::Integer, col::ColumnIndex) = df[row, col] @@ -95,13 +93,11 @@ Base.maybeview(df::AbstractDataFrame, rows, cols) = view(df, rows, cols) function Base.maybeview(df::DataFrame, ::typeof(!), cols) if !(cols isa ColumnIndex) - throw(ArgumentError("broadcasting with column replacement is currently allowed only for single column index")) + return ColReplaceDataFrame(df, index(df)[cols]) end if !(cols isa Symbol) && cols > ncol(df) throw(ArgumentError("creating new columns using an integer index by broadcasting is disallowed")) end - # in the future we might allow cols to target multiple columns - # in which case ColReplaceDataFrame(df, index(df)[cols]) will be returned LazyNewColDataFrame(df, cols) end @@ -229,28 +225,22 @@ function Base.copyto!(df::AbstractDataFrame, bc::Base.Broadcast.Broadcasted{<:Ba end end -# copyto! for ColReplaceDataFrame is reserved for future extensions if we decide to allow df[!, cols] .= v -# function Base.copyto!(df::ColReplaceDataFrame, bc::Base.Broadcast.Broadcasted) -# bcf = Base.Broadcast.flatten(bc) -# colnames = unique([_names(x) for x in bcf.args if x isa AbstractDataFrame]) -# if length(colnames) > 1 || (length(colnames) == 1 && view(_names(df.df), df.cols) != colnames[1]) -# wrongnames = setdiff(union(colnames...), intersect(colnames...)) -# msg = join(wrongnames, ", ", " and ") -# throw(ArgumentError("Column names in broadcasted data frames must match. " * -# "Non matching column names are $msg")) -# end +function Base.copyto!(crdf::ColReplaceDataFrame, bc::Base.Broadcast.Broadcasted) + bcf = Base.Broadcast.flatten(bc) + colnames = unique([_names(x) for x in bcf.args if x isa AbstractDataFrame]) + if length(colnames) > 1 || (length(colnames) == 1 && view(_names(crdf.df), crdf.cols) != colnames[1]) + wrongnames = setdiff(union(colnames...), intersect(colnames...)) + msg = join(wrongnames, ", ", " and ") + throw(ArgumentError("Column names in broadcasted data frames must match. " * + "Non matching column names are $msg")) + end -# nrows = length(axes(bcf)[1]) -# for (i, colidx) in enumerate(df.cols) -# bcf′ = getcolbc(bcf, i) -# v1 = bcf′[CartesianIndex(1, i)] -# startcol = similar(Vector{typeof(v1)}, nrows) -# startcol[1] = v1 -# col = copyto_widen!(startcol, bcf′, 2, i) -# df.df[!, colidx] = col -# end -# df.df -# end + bcf′ = Base.Broadcast.preprocess(crdf, bcf) + for (i, col) in enumerate(crdf.cols) + crdf.df[!, col] = [bcf'[CartesianIndex(j, i)] for j in axes(crdf, 1)] + end + crdf.df +end Base.Broadcast.broadcast_unalias(dest::DataFrameRow, src) = Base.Broadcast.broadcast_unalias(parent(dest), src) From db285741cbd7327a9265b40f269b4333597e5969 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 13 Aug 2019 23:18:14 +0200 Subject: [PATCH 05/30] improve placement of deprecations --- src/dataframe/dataframe.jl | 2 +- src/deprecated.jl | 32 ++++++++++++++++---------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index 193a601bb2..6e38419156 100644 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -509,7 +509,7 @@ function Base.setindex!(df::DataFrame, idxs = index(df)[col_inds] if length(v) != length(idxs) throw(DimensionMismatch("$(length(idxs)) columns were selected and the assigned" * - " value contains $(length(v)) elements") + " value contains $(length(v)) elements")) end for (i, x) in enumerate(v) df[row_ind, i] = x diff --git a/src/deprecated.jl b/src/deprecated.jl index ca0ef51a4d..2c385c815e 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -1410,18 +1410,18 @@ import Base: setindex! # df[SingleColumnIndex] = Single Item (EXPANDS TO NROW(df) if NCOL(df) > 0) function Base.setindex!(df::DataFrame, v, col_ind::ColumnIndex) if haskey(index(df), col_ind) + df[:, col_ind] .= v Base.depwarn("Implicit broadcasting to an existing column in DataFrame assignment is deprecated." * "Use an explicit broadcast with `df[:, col_ind] .= v`", :setindex!) - df[:, col_ind] .= v else if ncol(df) == 0 + df[!, col_ind] = [v] Base.depwarn("Implicit broadcasting to a new column in DataFrame assignment is deprecated." * "Use `df[!, col_ind] = [v]` when `df` has zero columns", :setindex!) - df[!, col_ind] = [v] else + df[!, col_ind] .= v Base.depwarn("Implicit broadcasting to a new column in DataFrame assignment is deprecated." * "Use `df[!, col_ind] .= v` when `df` has some columns", :setindex!) - df[!, col_ind] .= v end end return df @@ -1447,19 +1447,19 @@ function Base.setindex!(df::DataFrame, setindex!(df, val, findall(col_inds)) end function Base.setindex!(df::DataFrame, val::Any, col_inds::AbstractVector{<:ColumnIndex}) - Base.depwarn("implicit broadcasting in setindex! is deprecated; " * - "use `df[:, col_inds] .= Ref(v)` broadcasting assignment to change the columns in place", :setindex!) for col_ind in col_inds df[col_ind] = val end + Base.depwarn("implicit broadcasting in setindex! is deprecated; " * + "use `df[:, col_inds] .= Ref(v)` broadcasting assignment to change the columns in place", :setindex!) return df end # df[:] = AbstractVector or Single Item function Base.setindex!(df::DataFrame, v, ::Colon) + df[1:size(df, 2)] = v Base.depwarn("`df[:] = v` syntax is deprecated; " * "use `df[:, :] .= Ref(v)` broadcasting assignment to change the columns in place", :setindex!) - df[1:size(df, 2)] = v df end @@ -1484,9 +1484,9 @@ function Base.setindex!(df::DataFrame, v::Any, row_inds::AbstractVector{<:Integer}, col_ind::ColumnIndex) + insert_multiple_entries!(df, v, row_inds, col_ind) Base.depwarn("implicit broadcasting in setindex! is deprecated; " * "use `df[row_inds, col_ind] .= Ref(v)` broadcasting assignment to change the column in place", :setindex!) - insert_multiple_entries!(df, v, row_inds, col_ind) return df end @@ -1513,11 +1513,11 @@ function Base.setindex!(df::DataFrame, v::AbstractVector, row_inds::AbstractVector{<:Integer}, col_inds::AbstractVector{<:ColumnIndex}) - Base.depwarn("implicit vector broadcasting in setindex! is deprecated; " * - "use `df[row_inds, col_inds] .= v` broadcasting assignment to change the columns in place", :setindex!) for col_ind in col_inds insert_multiple_entries!(df, v, row_inds, col_ind) end + Base.depwarn("implicit vector broadcasting in setindex! is deprecated; " * + "use `df[row_inds, col_inds] .= v` broadcasting assignment to change the columns in place", :setindex!) return df end @@ -1544,35 +1544,35 @@ function Base.setindex!(df::DataFrame, v::Any, row_inds::AbstractVector{<:Integer}, col_inds::AbstractVector{<:ColumnIndex}) - Base.depwarn("implicit vector broadcasting in setindex! is deprecated; " * - "use `df[row_inds, col_inds] .= Ref(v)` broadcasting assignment to change the columns in place", :setindex!) for col_ind in col_inds insert_multiple_entries!(df, v, row_inds, col_ind) end + Base.depwarn("implicit vector broadcasting in setindex! is deprecated; " * + "use `df[row_inds, col_inds] .= Ref(v)` broadcasting assignment to change the columns in place", :setindex!) return df end # df[:, :] = ... function Base.setindex!(df::DataFrame, v, ::Colon, ::Colon) + df[1:size(df, 1), 1:size(df, 2)] = v Base.depwarn("implicit vector broadcasting in setindex! is deprecated; " * "use `df[:, col_inds] .= Ref(v)` broadcasting assignment to change the columns in place", :setindex!) - df[1:size(df, 1), 1:size(df, 2)] = v df end # df[Any, :] = ... function Base.setindex!(df::DataFrame, v, row_inds, ::Colon) + df[row_inds, 1:size(df, 2)] = v Base.depwarn("implicit vector broadcasting in setindex! is deprecated; " * "use `df[row_inds, col_inds] .= Ref(v)` broadcasting assignment", :setindex!) - df[row_inds, 1:size(df, 2)] = v df end # df[:, Any] = ... function Base.setindex!(df::DataFrame, v, ::Colon, col_inds) + df[col_inds] = v Base.depwarn("implicit vector broadcasting in setindex! is deprecated; " * "use `df[:, col_inds] .= Ref(v)` broadcasting assignment to change the columns in place", :setindex!) - df[col_inds] = v df end @@ -1648,8 +1648,8 @@ function Base.setindex!(df::DataFrame, x = df[!, col_ind] x[row_inds] = v else - Base.depwarn("implicit vector broadcasting in setindex! is deprecated", :setindex!) insert_multiple_entries!(df, v, row_inds, col_ind) + Base.depwarn("implicit vector broadcasting in setindex! is deprecated", :setindex!) end return df end @@ -1662,8 +1662,8 @@ function Base.setindex!(df::DataFrame, x = df[!, col_ind] x[row_inds] = v else - Base.depwarn("implicit vector broadcasting in setindex! is deprecated", :setindex!) insert_multiple_entries!(df, v, row_inds, col_ind) + Base.depwarn("implicit vector broadcasting in setindex! is deprecated", :setindex!) end return df end From 6c3de3d28ae2d6fba282efe4184fafe485268cc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 14 Aug 2019 00:04:05 +0200 Subject: [PATCH 06/30] start rewriting tests; add Logging --- Project.toml | 3 +- src/other/broadcasting.jl | 2 +- test/broadcasting.jl | 6 +- test/dataframe.jl | 150 +++++++++++++++++++++++++++----------- 4 files changed, 113 insertions(+), 48 deletions(-) diff --git a/Project.toml b/Project.toml index 016d5e7e0d..e04192a52b 100644 --- a/Project.toml +++ b/Project.toml @@ -24,11 +24,12 @@ DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" DataValues = "e7dc6d0d-1eca-5fa6-8ad6-5aecde8b7ea5" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" +Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["DataStructures", "DataValues", "Dates", "LaTeXStrings", "Random", "Test"] +test = ["DataStructures", "DataValues", "Dates", "LaTeXStrings", "Logging", Random", "Test"] [compat] julia = "1" diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index 4d9fbcba84..db13090514 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -237,7 +237,7 @@ function Base.copyto!(crdf::ColReplaceDataFrame, bc::Base.Broadcast.Broadcasted) bcf′ = Base.Broadcast.preprocess(crdf, bcf) for (i, col) in enumerate(crdf.cols) - crdf.df[!, col] = [bcf'[CartesianIndex(j, i)] for j in axes(crdf, 1)] + crdf.df[!, col] = [bcf′[CartesianIndex(j, i)] for j in axes(crdf.df, 1)] end crdf.df end diff --git a/test/broadcasting.jl b/test/broadcasting.jl index 6df601e8c0..7e38d7f44d 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -1152,8 +1152,10 @@ end @test df == refdf df = copy(refdf) - @test_throws ArgumentError df[!, 1:2] .= 'a' - @test df == refdf + df[!, 1:2] .= 'a' + @test Matrix(df) == ['a' 'a' 7.5 10.5 13.5 + 'a' 'a' 8.5 11.5 14.5 + 'a' 'a' 9.5 12.5 15.5] df = copy(refdf) v1 = df[!, 1] diff --git a/test/dataframe.jl b/test/dataframe.jl index 4912e2ea1e..1fdd3d6200 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -1,6 +1,6 @@ module TestDataFrame -using Dates, DataFrames, Statistics, Random, Test +using Dates, DataFrames, Statistics, Random, Test, Logging using DataFrames: _columns const ≅ = isequal const ≇ = !isequal @@ -144,78 +144,105 @@ end end @testset "push!(df, row)" begin - df=DataFrame( first=[1,2,3], second=["apple","orange","pear"] ) + buf = IOBuffer() + sl = SimpleLogger(buf) - dfb= DataFrame( first=[1,2], second=["apple","orange"] ) - dfc= DataFrame( first=[1,2], second=["apple","orange"] ) + df=DataFrame(first=[1,2,3], second=["apple","orange","pear"]) + + dfb= DataFrame(first=[1,2], second=["apple","orange"]) + dfc= DataFrame(first=[1,2], second=["apple","orange"]) push!(dfb, Any[3,"pear"]) @test df == dfb - dfb= DataFrame( first=[1,2], second=["apple","orange"] ) + dfb= DataFrame(first=[1,2], second=["apple","orange"]) push!(dfb, (3,"pear")) @test df == dfb - dfb= DataFrame( first=[1,2], second=["apple","orange"] ) - @test_throws InexactError push!(dfb, (33.33,"pear")) + dfb= DataFrame(first=[1,2], second=["apple","orange"]) + with_logger(sl) do + @test_throws InexactError push!(dfb, (33.33,"pear")) + end @test dfc == dfb + @test occursin("Error adding value to column :first", String(take!(buf))) - dfb= DataFrame( first=[1,2], second=["apple","orange"] ) + dfb= DataFrame(first=[1,2], second=["apple","orange"]) @test_throws ArgumentError push!(dfb, (1,"2",3)) @test dfc == dfb - dfb= DataFrame( first=[1,2], second=["apple","orange"] ) - @test_throws MethodError push!(dfb, ("coconut",22)) + dfb= DataFrame(first=[1,2], second=["apple","orange"]) + with_logger(sl) do + @test_throws MethodError push!(dfb, ("coconut",22)) + end @test dfc == dfb + @test occursin("Error adding value to column :first", String(take!(buf))) - dfb= DataFrame( first=[1,2], second=["apple","orange"] ) - @test_throws MethodError push!(dfb, (11,22)) + dfb= DataFrame(first=[1,2], second=["apple","orange"]) + with_logger(sl) do + @test_throws MethodError push!(dfb, (11,22)) + end @test dfc == dfb + @test occursin("Error adding value to column :second", String(take!(buf))) - dfb= DataFrame( first=[1,2], second=["apple","orange"] ) + dfb= DataFrame(first=[1,2], second=["apple","orange"]) push!(dfb, Dict(:first=>3, :second=>"pear")) @test df == dfb - df=DataFrame( first=[1,2,3], second=["apple","orange","banana"] ) - dfb= DataFrame( first=[1,2], second=["apple","orange"] ) + df=DataFrame(first=[1,2,3], second=["apple","orange","banana"]) + dfb= DataFrame(first=[1,2], second=["apple","orange"]) push!(dfb, Dict(:first=>3, :second=>"banana")) @test df == dfb - dfb= DataFrame( first=[1,2], second=["apple","orange"] ) + dfb= DataFrame(first=[1,2], second=["apple","orange"]) push!(dfb, (first=3, second="banana")) @test df == dfb - dfb= DataFrame( first=[1,2], second=["apple","orange"] ) + dfb= DataFrame(first=[1,2], second=["apple","orange"]) push!(dfb, (second="banana", first=3)) @test df == dfb - df0= DataFrame( first=[1,2], second=["apple","orange"] ) - dfb= DataFrame( first=[1,2], second=["apple","orange"] ) - @test_throws MethodError push!(dfb, (second=3, first=3)) + df0= DataFrame(first=[1,2], second=["apple","orange"]) + dfb= DataFrame(first=[1,2], second=["apple","orange"]) + with_logger(sl) do + @test_throws MethodError push!(dfb, (second=3, first=3)) + end @test df0 == dfb + @test occursin("Error adding value to column :second", String(take!(buf))) - dfb= DataFrame( first=[1,2], second=["apple","orange"] ) + dfb= DataFrame(first=[1,2], second=["apple","orange"]) push!(dfb, (second="banana", first=3)) @test df == dfb - df0= DataFrame( first=[1,2], second=["apple","orange"] ) - dfb= DataFrame( first=[1,2], second=["apple","orange"] ) - @test_throws MethodError push!(dfb, Dict(:first=>true, :second=>false)) + df0= DataFrame(first=[1,2], second=["apple","orange"]) + dfb= DataFrame(first=[1,2], second=["apple","orange"]) + with_logger(sl) do + @test_throws MethodError push!(dfb, Dict(:first=>true, :second=>false)) + end @test df0 == dfb + @test occursin("Error adding value to column :second", String(take!(buf))) - df0= DataFrame( first=[1,2], second=["apple","orange"] ) - dfb= DataFrame( first=[1,2], second=["apple","orange"] ) - @test_throws MethodError push!(dfb, Dict(:first=>"chicken", :second=>"stuff")) + df0= DataFrame(first=[1,2], second=["apple","orange"]) + dfb= DataFrame(first=[1,2], second=["apple","orange"]) + with_logger(sl) do + @test_throws MethodError push!(dfb, Dict(:first=>"chicken", :second=>"stuff")) + end @test df0 == dfb + @test occursin("Error adding value to column :first", String(take!(buf))) - df0=DataFrame( first=[1,2,3], second=["apple","orange","pear"] ) - dfb=DataFrame( first=[1,2,3], second=["apple","orange","pear"] ) - @test_throws MethodError push!(dfb, Dict(:first=>"chicken", :second=>1)) + df0=DataFrame(first=[1,2,3], second=["apple","orange","pear"]) + dfb=DataFrame(first=[1,2,3], second=["apple","orange","pear"]) + with_logger(sl) do + @test_throws MethodError push!(dfb, Dict(:first=>"chicken", :second=>1)) + end @test df0 == dfb + @test occursin("Error adding value to column :first", String(take!(buf))) - df0=DataFrame( first=["1","2","3"], second=["apple","orange","pear"] ) - dfb=DataFrame( first=["1","2","3"], second=["apple","orange","pear"] ) - @test_throws MethodError push!(dfb, Dict(:first=>"chicken", :second=>1)) + df0=DataFrame(first=["1","2","3"], second=["apple","orange","pear"]) + dfb=DataFrame(first=["1","2","3"], second=["apple","orange","pear"]) + with_logger(sl) do + @test_throws MethodError push!(dfb, Dict(:first=>"chicken", :second=>1)) + end @test df0 == dfb + @test occursin("Error adding value to column :second", String(take!(buf))) df = DataFrame(x=1) push!(df, Dict(:x=>2), Dict(:x=>3)) @@ -226,8 +253,11 @@ end @test df[!, :x] == [1, 3, 5] && df[!, :y] == [2, 4, 6] df = DataFrame(x=1, y=2) - @test_throws KeyError push!(df, Dict(:x=>1, "y"=>2)) + with_logger(sl) do + @test_throws KeyError push!(df, Dict(:x=>1, "y"=>2)) + end @test df == DataFrame(x=1, y=2) + @test occursin("Error adding value to column :y", String(take!(buf))) df = DataFrame() @test push!(df, (a=1, b=true)) === df @@ -237,12 +267,21 @@ end df.a = [1,2,3] df.b = df.a dfc = copy(df) - @test_throws AssertionError push!(df, [1,2]) + with_logger(sl) do + @test_throws AssertionError push!(df, [1,2]) + end @test df == dfc - @test_throws AssertionError push!(df, (a=1,b=2)) + @test occursin("Error adding value to column :a", String(take!(buf))) + with_logger(sl) do + @test_throws AssertionError push!(df, (a=1,b=2)) + end @test df == dfc - @test_throws AssertionError push!(df, Dict(:a=>1, :b=>2)) + @test occursin("Error adding value to column :a", String(take!(buf))) + with_logger(sl) do + @test_throws AssertionError push!(df, Dict(:a=>1, :b=>2)) + end @test df == dfc + @test occursin("Error adding value to column :a", String(take!(buf))) @test_throws AssertionError push!(df, df[1, :]) @test df == dfc @test_throws AssertionError push!(df, dfc[1, :]) @@ -253,12 +292,21 @@ end df.b = df.a df.c = [1,2,3,4] dfc = copy(df) - @test_throws AssertionError push!(df, [1,2,3]) + with_logger(sl) do + @test_throws AssertionError push!(df, [1,2,3]) + end @test df == dfc - @test_throws AssertionError push!(df, (a=1,b=2,c=3)) + @test occursin("Error adding value to column :a", String(take!(buf))) + with_logger(sl) do + @test_throws AssertionError push!(df, (a=1,b=2,c=3)) + end @test df == dfc - @test_throws AssertionError push!(df, Dict(:a=>1, :b=>2, :c=>3)) + @test occursin("Error adding value to column :a", String(take!(buf))) + with_logger(sl) do + @test_throws AssertionError push!(df, Dict(:a=>1, :b=>2, :c=>3)) + end @test df == dfc + @test occursin("Error adding value to column :a", String(take!(buf))) @test_throws AssertionError push!(df, df[1, :]) @test df == dfc @test_throws AssertionError push!(df, dfc[1, :]) @@ -904,13 +952,21 @@ end end @testset "append!" begin + buf = IOBuffer() + sl = SimpleLogger(buf) df = DataFrame(A = 1:2, B = 1:2) df2 = DataFrame(A = 1:4, B = 1:4) @test append!(df, DataFrame(A = 3:4, B = [3.0, 4.0])) == df2 - @test_throws InexactError append!(df, DataFrame(A = 3:4, B = [3.5, 4.5])) + with_logger(sl) do + @test_throws InexactError append!(df, DataFrame(A = 3:4, B = [3.5, 4.5])) + end @test df == df2 - @test_throws MethodError append!(df, DataFrame(A = 3:4, B = ["a", "b"])) + @test occursin("Error adding value to column B", String(take!(buf))) + with_logger(sl) do + @test_throws MethodError append!(df, DataFrame(A = 3:4, B = ["a", "b"])) + end @test df == df2 + @test occursin("Error adding value to column B", String(take!(buf))) @test_throws ArgumentError append!(df, DataFrame(A = 1:4, C = 1:4)) @test df == df2 @@ -929,16 +985,22 @@ end df.a = [1,2,3] df.b = df.a dfc = copy(df) - @test_throws AssertionError append!(df, dfc) + with_logger(sl) do + @test_throws AssertionError append!(df, dfc) + end @test df == dfc + @test occursin("Error adding value to column a", String(take!(buf))) df = DataFrame() df.a = [1,2,3,4] df.b = df.a df.c = [1,2,3,4] dfc = copy(df) - @test_throws AssertionError append!(df, dfc) + with_logger(sl) do + @test_throws AssertionError append!(df, dfc) + end @test df == dfc + @test occursin("Error adding value to column a", String(take!(buf))) names!(df, [:a, :b, :z]) @test_throws ArgumentError append!(df, dfc) From 49b05265e7df9bf9b283276b67c57c30e42500f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 14 Aug 2019 08:27:52 +0200 Subject: [PATCH 07/30] fix typo --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index e04192a52b..cd3b40c3dd 100644 --- a/Project.toml +++ b/Project.toml @@ -29,7 +29,7 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["DataStructures", "DataValues", "Dates", "LaTeXStrings", "Logging", Random", "Test"] +test = ["DataStructures", "DataValues", "Dates", "LaTeXStrings", "Logging", "Random", "Test"] [compat] julia = "1" From d41d61de7267efd55940cf23ba7b940c36d48c55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 14 Aug 2019 08:35:56 +0200 Subject: [PATCH 08/30] handle logging warnings in unstack --- test/reshape.jl | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/test/reshape.jl b/test/reshape.jl index 78094ab555..f6c298d29c 100644 --- a/test/reshape.jl +++ b/test/reshape.jl @@ -1,6 +1,6 @@ module TestReshape -using Test, DataFrames, Random +using Test, DataFrames, Random, Logging const ≅ = isequal @testset "the output of unstack" begin @@ -145,9 +145,10 @@ end variable=["a", "b", "a", "b"], value=[3, 4, 5, 6]) @test_logs (:warn, "Duplicate entries in unstack at row 3 for key 1 and variable a.") unstack(df, :id, :variable, :value) @test_logs (:warn, "Duplicate entries in unstack at row 3 for key (1, 1) and variable a.") unstack(df, :variable, :value) - a = unstack(df, :id, :variable, :value) + a, b = with_logger(NullLogger()) do + unstack(df, :id, :variable, :value), unstack(df, :variable, :value) + end @test a ≅ DataFrame(id = [1, 2], a = [5, missing], b = [missing, 6]) - b = unstack(df, :variable, :value) @test b ≅ DataFrame(id = [1, 2], id2 = [1, 2], a = [5, missing], b = [missing, 6]) df = DataFrame(id=1:2, variable=["a", "b"], value=3:4) @@ -167,7 +168,9 @@ end variable=["a", "b", missing, "a", "b", "missing", "a", "b", "missing"], value=[missing, 2.0, 3.0, 4.0, 5.0, missing, 7.0, missing, 9.0]) @test_logs (:warn, "Missing value in variable variable at row 3. Skipping.") unstack(df, :variable, :value) - udf = unstack(df, :variable, :value) + udf = with_logger(NullLogger()) do + unstack(df, :variable, :value) + end @test names(udf) == [:id, :a, :b, :missing] @test udf[!, :missing] ≅ [missing, 9.0, missing] df = DataFrame(id=[1, 1, 1, missing, missing, missing, 2, 2, 2], @@ -175,7 +178,9 @@ end variable=["a", "b", missing, "a", "b", "missing", "a", "b", "missing"], value=[missing, 2.0, 3.0, 4.0, 5.0, missing, 7.0, missing, 9.0]) @test_logs (:warn, "Missing value in variable variable at row 3. Skipping.") unstack(df, 3, 4) - udf = unstack(df, 3, 4) + udf = with_logger(NullLogger()) do + unstack(df, 3, 4) + end @test names(udf) == [:id, :id2, :a, :b, :missing] @test udf[!, :missing] ≅ [missing, 9.0, missing] end From 6ba679cb347a7a945fed9c1ef3d593bead0b1b85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 14 Aug 2019 09:11:47 +0200 Subject: [PATCH 09/30] fix method ambiguities --- src/dataframe/dataframe.jl | 2 +- src/deprecated.jl | 47 +++++++++++++++++++++++++++++++++++--- test/deprecated.jl | 12 +--------- test/indexing.jl | 28 +++++++++++++++++++++++ 4 files changed, 74 insertions(+), 15 deletions(-) diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index 6e38419156..ad2ea54087 100644 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -526,9 +526,9 @@ function Base.setindex!(df::DataFrame, try x[row_inds] = v catch + insert_multiple_entries!(df, v, row_inds, col_ind) Base.depwarn("implicit vector broadcasting in setindex! is deprecated; " * "write `df[row_inds, col_ind] .= v` instead", :setindex!) - insert_multiple_entries!(df, v, row_inds, col_ind) end return df end diff --git a/src/deprecated.jl b/src/deprecated.jl index 2c385c815e..2b630ed0e5 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -1355,9 +1355,6 @@ import Base: map @deprecate map(f::Function, sdf::SubDataFrame) f(sdf) @deprecate map(f::Union{Function,Type}, dfc::DataFrameColumns{<:AbstractDataFrame, Pair{Symbol, AbstractVector}}) mapcols(f, dfc.df) -import Base: length -@deprecate length(df::AbstractDataFrame) size(df, 2) - @deprecate head(df::AbstractDataFrame) first(df, 6) @deprecate tail(df::AbstractDataFrame) last(df, 6) @deprecate head(df::AbstractDataFrame, n::Integer) first(df, n) @@ -1668,4 +1665,48 @@ function Base.setindex!(df::DataFrame, return df end +function Base.setindex!(df::DataFrame, + v, + row_ind::Integer, + col_inds::Colon) + idxs = index(df)[col_inds] + if length(v) != length(idxs) + throw(DimensionMismatch("$(length(idxs)) columns were selected and the assigned" * + " value contains $(length(v)) elements")) + end + for (i, x) in enumerate(v) + df[row_ind, i] = x + end + return df +end + +function Base.setindex!(df::DataFrame, + new_df::AbstractDataFrame, + row_inds::Union{AbstractVector, Not, Colon}, + col_inds::Colon) + idxs = index(df)[col_inds] + if view(_names(df), idxs) != _names(new_df) + Base.depwarn("in the future column names in source and target will have to match", :setindex!) + end + for (j, col) in enumerate(idxs) + df[row_inds, col] = new_df[!, j] + end + return df +end + +function Base.setindex!(df::DataFrame, + v::AbstractVector, + row_inds::Colon, + col_ind::ColumnIndex) + x = df[!, col_ind] + try + x[row_inds] = v + catch + insert_multiple_entries!(df, v, 1:nrow(df), col_ind) + Base.depwarn("implicit vector broadcasting in setindex! is deprecated; " * + "write `df[row_inds, col_ind] .= v` instead", :setindex!) + end + return df +end + ##### END: duplicate methods diff --git a/test/deprecated.jl b/test/deprecated.jl index 71e36afdb2..c86c998e19 100644 --- a/test/deprecated.jl +++ b/test/deprecated.jl @@ -349,16 +349,9 @@ end end @testset "old setindex! tests" begin - missing_df = DataFrame() - df = DataFrame(Matrix{Int}(undef, 4, 3)) - - # Assignment of rows - df[1, :] = df[1:1, :] - df[1:2, :] = df[1:2, :] - df[[true,false,false,true], :] = df[2:3, :] + df = DataFrame(reshape(1:12, 4, :)) # Scalar broadcasting assignment of rows - df[1, :] = 1 df[1:2, :] = 1 df[[true,false,false,true], :] = 3 @@ -366,9 +359,6 @@ end df[1:2, :] = [2,3] df[[true,false,false,true], :] = [2,3] - # Assignment of columns - df[:, 2] = ones(4) - # Broadcasting assignment of columns df[:, 1] = 1 df[:x3] = 2 diff --git a/test/indexing.jl b/test/indexing.jl index 287e2dc9dc..4c71e8990a 100644 --- a/test/indexing.jl +++ b/test/indexing.jl @@ -1059,4 +1059,32 @@ end # TODO: add these tests after deprecation period. Same issues as with DataFrame case end +@testset "old setindex! tests adjusted to new rules" begin + df = DataFrame(reshape(1:12, 4, :)) + + @test_throws MethodError df[1, :] = df[1:1, :] + + df[1:2, :] = df[3:4, :] + @test df == DataFrame([3 7 11 + 4 8 12 + 3 7 11 + 4 8 12]) + + df[[true,false,true,false], :] = df[[2,4], :] + @test df == DataFrame([4 8 12 + 4 8 12 + 4 8 12 + 4 8 12]) + + @test_throws DimensionMismatch df[1, :] = 1 + + df[:, 2] = ones(4) + @test df == DataFrame([4 1 12 + 4 1 12 + 4 1 12 + 4 1 12]) + + @test_throws InexactError df[:, 2] = fill(1.5, 4) +end + end # module From 865fbf657b664caaa3f5e4326737eabb7506c353 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 14 Aug 2019 09:35:34 +0200 Subject: [PATCH 10/30] disable logging for test/deprecated.jl --- test/deprecated.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/deprecated.jl b/test/deprecated.jl index c86c998e19..f5dc794072 100644 --- a/test/deprecated.jl +++ b/test/deprecated.jl @@ -1,10 +1,12 @@ module TestDeprecated -using Test, DataFrames, Random +using Test, DataFrames, Random, Logging import DataFrames: identifier const ≅ = isequal +old_logger = global_logger(NullLogger()) + # old sort(df; cols=...) syntax df = DataFrame(a=[1, 3, 2], b=[6, 5, 4]) @test sort(df; cols=[:a, :b]) == DataFrame(a=[1, 2, 3], b=[6, 4, 5]) @@ -483,4 +485,6 @@ end end end +global_logger(old_logger) + end # module From 69377c1bbb6067963d028d0f9dba80f35dbbb756 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 14 Aug 2019 14:03:25 +0200 Subject: [PATCH 11/30] switch to @eval for defining methods --- src/dataframe/dataframe.jl | 156 +++++++++++++++++-------------- src/dataframerow/dataframerow.jl | 46 ++++----- src/deprecated.jl | 56 ++--------- test/indexing.jl | 36 +++++++ 4 files changed, 151 insertions(+), 143 deletions(-) diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index ad2ea54087..3ff5dd61c6 100644 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -502,96 +502,110 @@ end # the method for value of type DataFrameRow, AbstractDict and NamedTuple # is defined in dataframerow.jl -function Base.setindex!(df::DataFrame, - v, - row_ind::Integer, - col_inds::Union{AbstractVector, Regex, Not, Between, All, Colon}) - idxs = index(df)[col_inds] - if length(v) != length(idxs) - throw(DimensionMismatch("$(length(idxs)) columns were selected and the assigned" * - " value contains $(length(v)) elements")) - end - for (i, x) in enumerate(v) - df[row_ind, i] = x +for T in (:AbstractVector, :Regex, :Not, :Between, :All, :Colon) + @eval function Base.setindex!(df::DataFrame, + v, + row_ind::Integer, + col_inds::$(T)) + idxs = index(df)[col_inds] + if length(v) != length(idxs) + throw(DimensionMismatch("$(length(idxs)) columns were selected and the assigned" * + " value contains $(length(v)) elements")) + end + for (i, x) in enumerate(v) + df[row_ind, i] = x + end + return df end - return df end # df[MultiRowIndex, SingleColumnIndex] = AbstractVector -function Base.setindex!(df::DataFrame, - v::AbstractVector, - row_inds::Union{AbstractVector, Not, Colon}, - col_ind::ColumnIndex) - x = df[!, col_ind] - try - x[row_inds] = v - catch - insert_multiple_entries!(df, v, row_inds, col_ind) - Base.depwarn("implicit vector broadcasting in setindex! is deprecated; " * - "write `df[row_inds, col_ind] .= v` instead", :setindex!) +for T in (:AbstractVector, :Not, :Colon) + @eval function Base.setindex!(df::DataFrame, + v::AbstractVector, + row_inds::$(T), + col_ind::ColumnIndex) + x = df[!, col_ind] + try + x[row_inds] = v + catch + insert_multiple_entries!(df, v, axes(df, 1)[row_inds], col_ind) + Base.depwarn("implicit vector broadcasting in setindex! is deprecated; " * + "write `df[row_inds, col_ind] .= v` instead", :setindex!) + end + return df end - return df end # df[MultiRowIndex, MultiColumnIndex] = AbstractDataFrame -function Base.setindex!(df::DataFrame, - new_df::AbstractDataFrame, - row_inds::Union{AbstractVector, Not, Colon}, - col_inds::Union{AbstractVector, Regex, Not, Between, All, Colon}) - idxs = index(df)[col_inds] - if view(_names(df), idxs) != _names(new_df) - Base.depwarn("in the future column names in source and target will have to match", :setindex!) - end - for (j, col) in enumerate(idxs) - df[row_inds, col] = new_df[!, j] +for T1 in (:AbstractVector, :Not, :Colon), + T2 in (:AbstractVector, :Regex, :Not, :Between, :All, :Colon) + @eval function Base.setindex!(df::DataFrame, + new_df::AbstractDataFrame, + row_inds::$(T1), + col_inds::$(T2)) + idxs = index(df)[col_inds] + for (j, col) in enumerate(idxs) + df[row_inds, col] = new_df[!, j] + end + if view(_names(df), idxs) != _names(new_df) + Base.depwarn("in the future column names in source and target will have to match", :setindex!) + end + return df end - return df end -function Base.setindex!(df::DataFrame, - new_df::AbstractDataFrame, - row_inds::typeof(!), - col_inds::Union{AbstractVector, Regex, Not, Between, All, Colon}) - idxs = index(df)[col_inds] - if view(_names(df), idxs) != _names(new_df) - throw(ArgumentError("Column names in source and target data frames do not match")) - end - for (j, col) in enumerate(idxs) - # make sure we make a copy on assignment - df[!, col] = new_df[:, j] +for T in (:AbstractVector, :Regex, :Not, :Between, :All, :Colon) + @eval function Base.setindex!(df::DataFrame, + new_df::AbstractDataFrame, + row_inds::typeof(!), + col_inds::$(T)) + idxs = index(df)[col_inds] + if view(_names(df), idxs) != _names(new_df) + throw(ArgumentError("Column names in source and target data frames do not match")) + end + for (j, col) in enumerate(idxs) + # make sure we make a copy on assignment + df[!, col] = new_df[:, j] + end + return df end - return df end # df[MultiRowIndex, MultiColumnIndex] = AbstractMatrix -function Base.setindex!(df::DataFrame, - mx::AbstractMatrix, - row_inds::Union{AbstractVector, Not, Colon}, - col_inds::Union{AbstractVector, Regex, Not, Between, All, Colon}) - idxs = index(df)[col_inds] - if size(mx, 2) != length(idxs) - throw(DimensionMismatch("number of selected columns ($(length(idxs))) and number of columns in" * - " matrix ($(size(mx, 2))) do not match")) - end - for (j, col) in enumerate(idxs) - df[row_inds, col] = view(mx, :, j) +for T1 in (:AbstractVector, :Not, :Colon), + T2 in (:AbstractVector, :Regex, :Not, :Between, :All, :Colon) + @eval function Base.setindex!(df::DataFrame, + mx::AbstractMatrix, + row_inds::$(T1), + col_inds::$(T2)) + idxs = index(df)[col_inds] + if size(mx, 2) != length(idxs) + throw(DimensionMismatch("number of selected columns ($(length(idxs))) and number of columns in" * + " matrix ($(size(mx, 2))) do not match")) + end + for (j, col) in enumerate(idxs) + df[row_inds, col] = view(mx, :, j) + end + return df end - return df end -function Base.setindex!(df::DataFrame, - mx::AbstractMatrix, - row_inds::typeof(!), - col_inds::Union{AbstractVector, Regex, Not, Between, All, Colon}) - idxs = index(df)[col_inds] - if size(mx, 2) != length(idxs) - throw(DimensionMismatch("number of selected columns ($(length(idxs))) and number of columns in" * - " matrix ($(size(mx, 2))) do not match")) - end - for (j, col) in enumerate(idxs) - df[!, col] = mx[:, j] +for T in (:AbstractVector, :Regex, :Not, :Between, :All, :Colon) + @eval function Base.setindex!(df::DataFrame, + mx::AbstractMatrix, + row_inds::typeof(!), + col_inds::$(T)) + idxs = index(df)[col_inds] + if size(mx, 2) != length(idxs) + throw(DimensionMismatch("number of selected columns ($(length(idxs))) and number of columns in" * + " matrix ($(size(mx, 2))) do not match")) + end + for (j, col) in enumerate(idxs) + df[!, col] = mx[:, j] + end + return df end - return df end ############################################################################## diff --git a/src/dataframerow/dataframerow.jl b/src/dataframerow/dataframerow.jl index d128b52b79..7a2b9920c8 100644 --- a/src/dataframerow/dataframerow.jl +++ b/src/dataframerow/dataframerow.jl @@ -98,32 +98,34 @@ Base.@propagate_inbounds Base.getindex(r::DataFrameRow, idxs::Union{AbstractVect DataFrameRow(parent(r), row(r), parentcols(index(r), idxs)) Base.@propagate_inbounds Base.getindex(r::DataFrameRow, ::Colon) = r -function Base.setindex!(df::DataFrame, - v::Union{DataFrameRow, NamedTuple, AbstractDict}, - row_ind::Integer, - col_inds::Union{AbstractVector, Regex, Not, Between, All, Colon}) - idxs = index(df)[col_inds] - if length(v) != length(idxs) - throw(DimensionMismatch("$(length(idxs)) columns were selected but the assigned" * - " value contains $(length(v)) elements")) - end +for T in (:AbstractVector, :Regex, :Not, :Between, :All, :Colon) + @eval function Base.setindex!(df::DataFrame, + v::Union{DataFrameRow, NamedTuple, AbstractDict}, + row_ind::Integer, + col_inds::$(T)) + idxs = index(df)[col_inds] + if length(v) != length(idxs) + throw(DimensionMismatch("$(length(idxs)) columns were selected but the assigned" * + " value contains $(length(v)) elements")) + end - if !(v isa AbstractDict || all(((a, b),) -> a == b, zip(view(_names(df), idxs), keys(v)))) - mismatched = findall(view(_names(df), idxs) .!= collect(keys(v))) - throw(ArgumentError("Selected column names do not match the names in assigned value in" * - " positions $(join(mismatched, ", ", " and "))")) - end - if v isa AbstractDict - for n in view(_names(df), idxs) - if !haskey(v, n) - throw(ArgumentError("Column $n not found in source dictionary")) + if !(v isa AbstractDict || all(((a, b),) -> a == b, zip(view(_names(df), idxs), keys(v)))) + mismatched = findall(view(_names(df), idxs) .!= collect(keys(v))) + throw(ArgumentError("Selected column names do not match the names in assigned value in" * + " positions $(join(mismatched, ", ", " and "))")) + end + if v isa AbstractDict + for n in view(_names(df), idxs) + if !haskey(v, n) + throw(ArgumentError("Column $n not found in source dictionary")) + end end end + for (col, val) in pairs(v) + df[row_ind, col] = val + end + return df end - for (col, val) in pairs(v) - df[row_ind, col] = val - end - return df end Base.@propagate_inbounds function Base.setindex!(r::DataFrameRow, value::Any, idx) diff --git a/src/deprecated.jl b/src/deprecated.jl index 2b630ed0e5..7586ec4e66 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -1584,12 +1584,12 @@ function Base.setindex!(df::DataFrame, row_inds::AbstractVector{<:Integer}, col_inds::AbstractVector{<:ColumnIndex}) idxs = index(df)[col_inds] - if names(df)[idxs] != names(new_df) - Base.depwarn("in the future column names in source and target will have to match", :setindex!) - end for (j, col) in enumerate(idxs) df[row_inds, col] = new_df[!, j] end + if names(df)[idxs] != names(new_df) + Base.depwarn("in the future column names in source and target will have to match", :setindex!) + end return df end @@ -1598,12 +1598,12 @@ function Base.setindex!(df::DataFrame, row_inds::AbstractVector{Bool}, col_inds::AbstractVector{<:ColumnIndex}) idxs = index(df)[col_inds] - if names(df)[idxs] != names(new_df) - Base.depwarn("in the future column names in source and target will have to match", :setindex!) - end for (j, col) in enumerate(idxs) df[row_inds, col] = new_df[!, j] end + if names(df)[idxs] != names(new_df) + Base.depwarn("in the future column names in source and target will have to match", :setindex!) + end return df end @@ -1665,48 +1665,4 @@ function Base.setindex!(df::DataFrame, return df end -function Base.setindex!(df::DataFrame, - v, - row_ind::Integer, - col_inds::Colon) - idxs = index(df)[col_inds] - if length(v) != length(idxs) - throw(DimensionMismatch("$(length(idxs)) columns were selected and the assigned" * - " value contains $(length(v)) elements")) - end - for (i, x) in enumerate(v) - df[row_ind, i] = x - end - return df -end - -function Base.setindex!(df::DataFrame, - new_df::AbstractDataFrame, - row_inds::Union{AbstractVector, Not, Colon}, - col_inds::Colon) - idxs = index(df)[col_inds] - if view(_names(df), idxs) != _names(new_df) - Base.depwarn("in the future column names in source and target will have to match", :setindex!) - end - for (j, col) in enumerate(idxs) - df[row_inds, col] = new_df[!, j] - end - return df -end - -function Base.setindex!(df::DataFrame, - v::AbstractVector, - row_inds::Colon, - col_ind::ColumnIndex) - x = df[!, col_ind] - try - x[row_inds] = v - catch - insert_multiple_entries!(df, v, 1:nrow(df), col_ind) - Base.depwarn("implicit vector broadcasting in setindex! is deprecated; " * - "write `df[row_inds, col_ind] .= v` instead", :setindex!) - end - return df -end - ##### END: duplicate methods diff --git a/test/indexing.jl b/test/indexing.jl index 4c71e8990a..d9f0a151d7 100644 --- a/test/indexing.jl +++ b/test/indexing.jl @@ -1059,6 +1059,42 @@ end # TODO: add these tests after deprecation period. Same issues as with DataFrame case end +@testset "setindex! with ! or : and multiple cols" begin + df = DataFrame(fill("x", 3, 4)) + df[!, :] = DataFrame(reshape(1:12, 3, :)) + @test df == DataFrame(reshape(1:12, 3, :)) + @test_throws ArgumentError df[!, :] = DataFrame(fill(1, 3, 4))[:, [3,2,1]] + @test_throws ArgumentError df[!, :] = DataFrame(fill(1, 3, 4))[1:2, :] + + df = DataFrame(fill("x", 3, 4)) + df[!, Not(4)] = DataFrame(reshape(1:12, 3, :))[:, 1:3] + @test df[:, 1:3] == DataFrame(reshape(1:12, 3, :))[:, 1:3] + + df = DataFrame(fill("x", 3, 4)) + df[!, :] = reshape(1:12, 3, :) + @test df == DataFrame(reshape(1:12, 3, :)) + + df = DataFrame(fill("x", 3, 4)) + df[!, Not(4)] = reshape(1:12, 3, :)[:, 1:3] + @test df[:, 1:3] == DataFrame(reshape(1:12, 3, :))[:, 1:3] + + dfv = view(df, :, :) + @test_throws ArgumentError dfv[!, :] = DataFrame(reshape(1:12, 3, :)) + @test_throws ArgumentError dfv[!, :] = reshape(1:12, 3, :) + + for rows in [:, 1:3], cols in [:, r"", Not(r"xx"), 1:4] + df = DataFrame(ones(3,4)) + df[rows, cols] = DataFrame(reshape(1:12, 3, :)) + @test df == DataFrame(reshape(1:12, 3, :)) + end + + for rows in [:, 1:3], cols in [:, r"", Not(r"xx"), 1:4] + df = DataFrame(ones(3,4)) + df[rows, cols] = reshape(1:12, 3, :) + @test df == DataFrame(reshape(1:12, 3, :)) + end +end + @testset "old setindex! tests adjusted to new rules" begin df = DataFrame(reshape(1:12, 4, :)) From 1e9d8aff9a24b8e04ca1d5673b348571de7950b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 14 Aug 2019 15:00:33 +0200 Subject: [PATCH 12/30] clean up deprecated methods --- src/deprecated.jl | 152 +++------------------------------------------- 1 file changed, 9 insertions(+), 143 deletions(-) diff --git a/src/deprecated.jl b/src/deprecated.jl index 7586ec4e66..cd180d5cc4 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -1466,20 +1466,10 @@ end @deprecate setindex!(df::DataFrame, new_df::DataFrame, row_ind::Integer, col_inds::AbstractVector{<:ColumnIndex}) (foreach(c -> (df[row_ind, c] = new_df[1, c]), col_inds); df) -# df[SingleRowIndex, MultiColumnIndex] = Single Item -@deprecate setindex!(df::DataFrame, v::Any, row_ind::Integer, - col_inds::AbstractVector{<:ColumnIndex}) (df[row_ind, col_inds] .= Ref(v); df) - # df[MultiRowIndex, SingleColumnIndex] = Single Item function Base.setindex!(df::DataFrame, v::Any, - row_inds::AbstractVector{Bool}, - col_ind::ColumnIndex) - setindex!(df, v, findall(row_inds), col_ind) -end -function Base.setindex!(df::DataFrame, - v::Any, - row_inds::AbstractVector{<:Integer}, + row_inds::AbstractVector, col_ind::ColumnIndex) insert_multiple_entries!(df, v, row_inds, col_ind) Base.depwarn("implicit broadcasting in setindex! is deprecated; " * @@ -1490,27 +1480,10 @@ end # df[MultiRowIndex, MultiColumnIndex] = AbstractVector function Base.setindex!(df::DataFrame, v::AbstractVector, - row_inds::AbstractVector{Bool}, - col_inds::AbstractVector{Bool}) - setindex!(df, v, findall(row_inds), findall(col_inds)) -end -function Base.setindex!(df::DataFrame, - v::AbstractVector, - row_inds::AbstractVector{Bool}, - col_inds::AbstractVector{<:ColumnIndex}) - setindex!(df, v, findall(row_inds), col_inds) -end -function Base.setindex!(df::DataFrame, - v::AbstractVector, - row_inds::AbstractVector{<:Integer}, - col_inds::AbstractVector{Bool}) - setindex!(df, v, row_inds, findall(col_inds)) -end -function Base.setindex!(df::DataFrame, - v::AbstractVector, - row_inds::AbstractVector{<:Integer}, - col_inds::AbstractVector{<:ColumnIndex}) - for col_ind in col_inds + row_inds::AbstractVector, + col_inds::AbstractVector) + col_inds_norm = index(df)[col_inds] + for col_ind in col_inds_norm insert_multiple_entries!(df, v, row_inds, col_ind) end Base.depwarn("implicit vector broadcasting in setindex! is deprecated; " * @@ -1521,27 +1494,10 @@ end # df[MultiRowIndex, MultiColumnIndex] = Single Item function Base.setindex!(df::DataFrame, v::Any, - row_inds::AbstractVector{Bool}, - col_inds::AbstractVector{Bool}) - setindex!(df, v, findall(row_inds), findall(col_inds)) -end -function Base.setindex!(df::DataFrame, - v::Any, - row_inds::AbstractVector{Bool}, - col_inds::AbstractVector{<:ColumnIndex}) - setindex!(df, v, findall(row_inds), col_inds) -end -function Base.setindex!(df::DataFrame, - v::Any, - row_inds::AbstractVector{<:Integer}, - col_inds::AbstractVector{Bool}) - setindex!(df, v, row_inds, findall(col_inds)) -end -function Base.setindex!(df::DataFrame, - v::Any, - row_inds::AbstractVector{<:Integer}, - col_inds::AbstractVector{<:ColumnIndex}) - for col_ind in col_inds + row_inds::AbstractVector, + col_inds::AbstractVector) + col_inds_norm = index(df)[col_inds] + for col_ind in col_inds_norm insert_multiple_entries!(df, v, row_inds, col_ind) end Base.depwarn("implicit vector broadcasting in setindex! is deprecated; " * @@ -1576,93 +1532,3 @@ end import Base: setproperty! @deprecate setproperty!(df::DataFrame, col_ind::Symbol, v) (df[!, col_ind] .= v) @deprecate setproperty!(df::SubDataFrame, col_ind::Symbol, v) (df[:, col_ind] .= v) - -##### BEGIN: There methods duplicate functionality but are needed to resolve method call ambiuguities - -function Base.setindex!(df::DataFrame, - new_df::AbstractDataFrame, - row_inds::AbstractVector{<:Integer}, - col_inds::AbstractVector{<:ColumnIndex}) - idxs = index(df)[col_inds] - for (j, col) in enumerate(idxs) - df[row_inds, col] = new_df[!, j] - end - if names(df)[idxs] != names(new_df) - Base.depwarn("in the future column names in source and target will have to match", :setindex!) - end - return df -end - -function Base.setindex!(df::DataFrame, - new_df::AbstractDataFrame, - row_inds::AbstractVector{Bool}, - col_inds::AbstractVector{<:ColumnIndex}) - idxs = index(df)[col_inds] - for (j, col) in enumerate(idxs) - df[row_inds, col] = new_df[!, j] - end - if names(df)[idxs] != names(new_df) - Base.depwarn("in the future column names in source and target will have to match", :setindex!) - end - return df -end - -function Base.setindex!(df::DataFrame, - mx::AbstractMatrix, - row_inds::AbstractVector{<:Integer}, - col_inds::AbstractVector{<:ColumnIndex}) - idxs = index(df)[col_inds] - if size(mx, 2) != length(idxs) - throw(DimensionMismatch("number of selected columns ($(length(idxs))) and a" * - " matrix ($(size(mx, 2))) do not match")) - end - for (j, col) in enumerate(idxs) - df[row_inds, col] = view(mx, :, j) - end - return df -end - -function Base.setindex!(df::DataFrame, - mx::AbstractMatrix, - row_inds::AbstractVector{Bool}, - col_inds::AbstractVector{<:ColumnIndex}) - idxs = index(df)[col_inds] - if size(mx, 2) != length(idxs) - throw(DimensionMismatch("number of selected columns ($(length(idxs))) and a" * - " matrix ($(size(mx, 2))) do not match")) - end - for (j, col) in enumerate(idxs) - df[row_inds, col] = view(mx, :, j) - end - return df -end - -function Base.setindex!(df::DataFrame, - v::AbstractVector, - row_inds::AbstractVector{<:Integer}, - col_ind::ColumnIndex) - if length(v) == length(df[row_inds, col_ind]) - x = df[!, col_ind] - x[row_inds] = v - else - insert_multiple_entries!(df, v, row_inds, col_ind) - Base.depwarn("implicit vector broadcasting in setindex! is deprecated", :setindex!) - end - return df -end - -function Base.setindex!(df::DataFrame, - v::AbstractVector, - row_inds::AbstractVector{Bool}, - col_ind::ColumnIndex) - if length(v) == length(df[row_inds, col_ind]) - x = df[!, col_ind] - x[row_inds] = v - else - insert_multiple_entries!(df, v, row_inds, col_ind) - Base.depwarn("implicit vector broadcasting in setindex! is deprecated", :setindex!) - end - return df -end - -##### END: duplicate methods From 9ce08a595fd54a36a93bed285ba465ed74628fae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 14 Aug 2019 15:25:32 +0200 Subject: [PATCH 13/30] DataFrameRow assignment tests --- src/dataframerow/dataframerow.jl | 9 +- test/dataframe.jl | 20 ++- test/indexing.jl | 229 +++++++++++++++++++++++++++++-- 3 files changed, 225 insertions(+), 33 deletions(-) diff --git a/src/dataframerow/dataframerow.jl b/src/dataframerow/dataframerow.jl index 7a2b9920c8..99aa27d07e 100644 --- a/src/dataframerow/dataframerow.jl +++ b/src/dataframerow/dataframerow.jl @@ -128,13 +128,8 @@ for T in (:AbstractVector, :Regex, :Not, :Between, :All, :Colon) end end -Base.@propagate_inbounds function Base.setindex!(r::DataFrameRow, value::Any, idx) - col = parentcols(index(r), idx) - if !(col isa Int) - Base.depwarn("implicit broadcasting in DataFrameRow assignment is deprecated", :setindex!) - end - setindex!(parent(r), value, row(r), col) -end +Base.@propagate_inbounds Base.setindex!(r::DataFrameRow, value, idx) = + setindex!(parent(r), value, row(r), parentcols(index(r), idx)) index(r::DataFrameRow) = getfield(r, :colindex) diff --git a/test/dataframe.jl b/test/dataframe.jl index 1fdd3d6200..7d0d0b5f3b 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -1717,19 +1717,16 @@ end @view df[!, All()] @view df[!, Between(1,2)] -# TODO: enable after setindex! rules update -# df[1, All()] = (a=1, b=2, c=3) -# df[1, Between(1,2)] = (a=1, b=2) + df[1, All()] = (a=1, b=2, c=3) + df[1, Between(1,2)] = (a=1, b=2) df[1:1, All()] = df df[1:1, Between(1,2)] = df[!, 1:2] -# TODO: enable after setindex! rules update -# df[:, All()] = df -# df[:, Between(1,2)] = df[!, 1:2] + df[:, All()] = df + df[:, Between(1,2)] = df[!, 1:2] df[1:1, All()] = Matrix(df) df[1:1, Between(1,2)] = Matrix(df[!, 1:2]) -# TODO: enable after setindex! rules update -# df[:, All()] = Matrix(df) -# df[:, Between(1,2)] = Matrix(df[!, 1:2]) + df[:, All()] = Matrix(df) + df[:, Between(1,2)] = Matrix(df[!, 1:2]) df2 = vcat(df, df) df2[Not(1), All()] = df @@ -1747,9 +1744,8 @@ end dfr = df[1, :] dfr[All()] dfr[Between(1,2)] -# TODO: enable after setindex! rules update -# dfr[All()] = (a=1, b=2, c=3) -# dfr[Between(1,2)] = (a=1, b=2) + dfr[All()] = (a=1, b=2, c=3) + dfr[Between(1,2)] = (a=1, b=2) @view dfr[All()] @view dfr[Between(1,2)] diff --git a/test/indexing.jl b/test/indexing.jl index d9f0a151d7..19542d5f52 100644 --- a/test/indexing.jl +++ b/test/indexing.jl @@ -827,18 +827,67 @@ end # `df[row, cols] = v` -> set row `row` of columns `cols` in-place; # the same as `dfr = df[row, cols]; dfr[:] = v` - # TODO: add these tests after deprecation period - # here is the example current behavior (that we have to keep) that disallows any tests: - # - # julia> df = DataFrame(a=[[1,2]],b=[[1,2]]); - # julia> dfr = df[1, :]; - # julia> dfr[:] = [10, 11]; - # julia> df - # 1×2 DataFrame - # │ Row │ a │ b │ - # │ │ Array… │ Array… │ - # ├─────┼──────────┼──────────┤ - # │ 1 │ [10, 11] │ [10, 11] │ + df = DataFrame(a=[[1,2]],b=[[1,2]]) + dfr = df[1, :]; + @test_throws MethodError dfr[:] = [10, 11] + @test df == DataFrame(a=[[1,2]],b=[[1,2]]) + @test_throws MethodError df[1, :] = [10, 11] + @test df == DataFrame(a=[[1,2]],b=[[1,2]]) + + df = DataFrame(a=1,b=2) + df[1, :] = [10, 11] + @test df == DataFrame(a=10,b=11) + df = DataFrame(a=1,b=2) + dfr = df[1, :] + dfr[:] = [10, 11] + @test df == DataFrame(a=10,b=11) + + df = DataFrame(a=1,b=2) + df[1, :] = (10, 11) + @test df == DataFrame(a=10,b=11) + df = DataFrame(a=1,b=2) + dfr = df[1, :] + dfr[:] = (10, 11) + @test df == DataFrame(a=10,b=11) + + @test_throws DimensionMismatch df[1, :] = [1, 2, 3] + @test_throws DimensionMismatch dfr[:] = [1, 2, 3] + + df = DataFrame(a=1,b=2) + df[1, :] = Dict(:a=>10, :b=>11) + @test df == DataFrame(a=10,b=11) + df = DataFrame(a=1,b=2) + @test_throws ArgumentError df[1, :] = Dict(:a=>10, :c=>11) + @test df == DataFrame(a=1,b=2) + df = DataFrame(a=1,b=2) + @test_throws DimensionMismatch df[1, :] = Dict(:a=>10, :b=>11, :c=>12) + @test df == DataFrame(a=1,b=2) + + df = DataFrame(a=1,b=2) + df[1, :] = (a=10, b=11) + @test df == DataFrame(a=10,b=11) + df = DataFrame(a=1,b=2) + @test_throws ArgumentError df[1, :] = (a=10, c=11) + @test df == DataFrame(a=1,b=2) + df = DataFrame(a=1,b=2) + @test_throws ArgumentError df[1, :] = (b=10, a=11) + @test df == DataFrame(a=1,b=2) + df = DataFrame(a=1,b=2) + @test_throws DimensionMismatch df[1, :] = (a=10, b=11, c=12) + @test df == DataFrame(a=1,b=2) + + df = DataFrame(a=1,b=2) + df[1, :] = DataFrame(a=10, b=11)[1, :] + @test df == DataFrame(a=10,b=11) + df = DataFrame(a=1,b=2) + @test_throws ArgumentError df[1, :] = DataFrame(a=10, c=11)[1, :] + @test df == DataFrame(a=1,b=2) + df = DataFrame(a=1,b=2) + @test_throws ArgumentError df[1, :] = DataFrame(b=10, a=11)[1, :] + @test df == DataFrame(a=1,b=2) + df = DataFrame(a=1,b=2) + @test_throws DimensionMismatch df[1, :] = DataFrame(a=10, b=11, c=12)[1, :] + @test df == DataFrame(a=1,b=2) # `df[rows, col] = v` -> set rows `rows` of column `col` in-place; `v` must be an `AbstractVector` @@ -950,7 +999,67 @@ end # `sdf[row, cols] = v` -> the same as `dfr = df[row, cols]; dfr[:] = v` in-place; - # TODO: add these tests after deprecation period. Same issues as with DataFrame case + df = view(DataFrame(a=[[1,2]],b=[[1,2]]), :, :) + dfr = df[1, :]; + @test_throws MethodError dfr[:] = [10, 11] + @test df == DataFrame(a=[[1,2]],b=[[1,2]]) + @test_throws MethodError df[1, :] = [10, 11] + @test df == DataFrame(a=[[1,2]],b=[[1,2]]) + + df = view(DataFrame(a=1,b=2), :, :) + df[1, :] = [10, 11] + @test df == DataFrame(a=10,b=11) + df = view(DataFrame(a=1,b=2), :, :) + dfr = df[1, :] + dfr[:] = [10, 11] + @test df == DataFrame(a=10,b=11) + + df = view(DataFrame(a=1,b=2), :, :) + df[1, :] = (10, 11) + @test df == DataFrame(a=10,b=11) + df = view(DataFrame(a=1,b=2), :, :) + dfr = df[1, :] + dfr[:] = (10, 11) + @test df == DataFrame(a=10,b=11) + + @test_throws DimensionMismatch df[1, :] = [1, 2, 3] + @test_throws DimensionMismatch dfr[:] = [1, 2, 3] + + df = view(DataFrame(a=1,b=2), :, :) + df[1, :] = Dict(:a=>10, :b=>11) + @test df == DataFrame(a=10,b=11) + df = view(DataFrame(a=1,b=2), :, :) + @test_throws ArgumentError df[1, :] = Dict(:a=>10, :c=>11) + @test df == DataFrame(a=1,b=2) + df = view(DataFrame(a=1,b=2), :, :) + @test_throws DimensionMismatch df[1, :] = Dict(:a=>10, :b=>11, :c=>12) + @test df == DataFrame(a=1,b=2) + + df = view(DataFrame(a=1,b=2), :, :) + df[1, :] = (a=10, b=11) + @test df == DataFrame(a=10,b=11) + df = view(DataFrame(a=1,b=2), :, :) + @test_throws ArgumentError df[1, :] = (a=10, c=11) + @test df == DataFrame(a=1,b=2) + df = view(DataFrame(a=1,b=2), :, :) + @test_throws ArgumentError df[1, :] = (b=10, a=11) + @test df == DataFrame(a=1,b=2) + df = view(DataFrame(a=1,b=2), :, :) + @test_throws DimensionMismatch df[1, :] = (a=10, b=11, c=12) + @test df == DataFrame(a=1,b=2) + + df = view(DataFrame(a=1,b=2), :, :) + df[1, :] = DataFrame(a=10, b=11)[1, :] + @test df == DataFrame(a=10,b=11) + df = view(DataFrame(a=1,b=2), :, :) + @test_throws ArgumentError df[1, :] = DataFrame(a=10, c=11)[1, :] + @test df == DataFrame(a=1,b=2) + df = view(DataFrame(a=1,b=2), :, :) + @test_throws ArgumentError df[1, :] = DataFrame(b=10, a=11)[1, :] + @test df == DataFrame(a=1,b=2) + df = view(DataFrame(a=1,b=2), :, :) + @test_throws DimensionMismatch df[1, :] = DataFrame(a=10, b=11, c=12)[1, :] + @test df == DataFrame(a=1,b=2) # `sdf[rows, col] = v` -> set rows `rows` of column `col`, in-place; `v` must be an abstract vector; @@ -1056,7 +1165,99 @@ end # `v` can be an `AbstractVector` or `v` can be a `NamedTuple` or `DataFrameRow` # when column names must match; - # TODO: add these tests after deprecation period. Same issues as with DataFrame case + df = DataFrame(a=1,b=2) + dfr = df[1, :] + dfr[:] = Dict(:a=>10, :b=>11) + @test df == DataFrame(a=10,b=11) + df = DataFrame(a=1,b=2) + dfr = df[1, :] + @test_throws ArgumentError dfr[:] = Dict(:a=>10, :c=>11) + @test df == DataFrame(a=1,b=2) + df = DataFrame(a=1,b=2) + dfr = df[1, :] + @test_throws DimensionMismatch dfr[:] = Dict(:a=>10, :b=>11, :c=>12) + @test df == DataFrame(a=1,b=2) + + df = DataFrame(a=1,b=2) + dfr = df[1, :] + dfr[:] = (a=10, b=11) + @test df == DataFrame(a=10,b=11) + df = DataFrame(a=1,b=2) + dfr = df[1, :] + @test_throws ArgumentError dfr[:] = (a=10, c=11) + @test df == DataFrame(a=1,b=2) + df = DataFrame(a=1,b=2) + dfr = df[1, :] + @test_throws ArgumentError dfr[:] = (b=10, a=11) + @test df == DataFrame(a=1,b=2) + df = DataFrame(a=1,b=2) + dfr = df[1, :] + @test_throws DimensionMismatch dfr[:] = (a=10, b=11, c=12) + @test df == DataFrame(a=1,b=2) + + df = DataFrame(a=1,b=2) + dfr = df[1, :] + dfr[:] = DataFrame(a=10, b=11)[1, :] + @test df == DataFrame(a=10,b=11) + df = DataFrame(a=1,b=2) + dfr = df[1, :] + @test_throws ArgumentError dfr[:] = DataFrame(a=10, c=11)[1, :] + @test df == DataFrame(a=1,b=2) + df = DataFrame(a=1,b=2) + dfr = df[1, :] + @test_throws ArgumentError dfr[:] = DataFrame(b=10, a=11)[1, :] + @test df == DataFrame(a=1,b=2) + df = DataFrame(a=1,b=2) + dfr = df[1, :] + @test_throws DimensionMismatch dfr[:] = DataFrame(a=10, b=11, c=12)[1, :] + @test df == DataFrame(a=1,b=2) + + df = DataFrame(a=1,b=2, c=3) + dfr = df[1, :] + dfr[Not(3)] = Dict(:a=>10, :b=>11) + @test df == DataFrame(a=10,b=11, c=3) + df = DataFrame(a=1,b=2, c=3) + dfr = df[1, :] + @test_throws ArgumentError dfr[Not(3)] = Dict(:a=>10, :c=>11) + @test df == DataFrame(a=1,b=2, c=3) + df = DataFrame(a=1,b=2, c=3) + dfr = df[1, :] + @test_throws DimensionMismatch dfr[Not(3)] = Dict(:a=>10, :b=>11, :c=>12) + @test df == DataFrame(a=1,b=2, c=3) + + df = DataFrame(a=1,b=2, c=3) + dfr = df[1, :] + dfr[Not(3)] = (a=10, b=11) + @test df == DataFrame(a=10,b=11, c=3) + df = DataFrame(a=1,b=2, c=3) + dfr = df[1, :] + @test_throws ArgumentError dfr[Not(3)] = (a=10, c=11) + @test df == DataFrame(a=1,b=2, c=3) + df = DataFrame(a=1,b=2, c=3) + dfr = df[1, :] + @test_throws ArgumentError dfr[Not(3)] = (b=10, a=11) + @test df == DataFrame(a=1,b=2, c=3) + df = DataFrame(a=1,b=2, c=3) + dfr = df[1, :] + @test_throws DimensionMismatch dfr[Not(3)] = (a=10, b=11, c=12) + @test df == DataFrame(a=1,b=2, c=3) + + df = DataFrame(a=1,b=2, c=3) + dfr = df[1, :] + dfr[Not(3)] = DataFrame(a=10, b=11)[1, :] + @test df == DataFrame(a=10,b=11, c=3) + df = DataFrame(a=1,b=2, c=3) + dfr = df[1, :] + @test_throws ArgumentError dfr[Not(3)] = DataFrame(a=10, c=11)[1, :] + @test df == DataFrame(a=1,b=2, c=3) + df = DataFrame(a=1,b=2, c=3) + dfr = df[1, :] + @test_throws ArgumentError dfr[Not(3)] = DataFrame(b=10, a=11)[1, :] + @test df == DataFrame(a=1,b=2, c=3) + df = DataFrame(a=1,b=2, c=3) + dfr = df[1, :] + @test_throws DimensionMismatch dfr[Not(3)] = DataFrame(a=10, b=11, c=12)[1, :] + @test df == DataFrame(a=1,b=2, c=3) end @testset "setindex! with ! or : and multiple cols" begin From 17c1db3bfc7b16bea8ce8e1ffa2de054e8c03ce3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 14 Aug 2019 15:40:02 +0200 Subject: [PATCH 14/30] broadcasting tests --- test/broadcasting.jl | 65 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/test/broadcasting.jl b/test/broadcasting.jl index 7e38d7f44d..ccb0fb9096 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -1347,4 +1347,69 @@ end @test df == DataFrame(a=[[1000,1000,1000]]) end +@testset "broadcasting into df[!, cols]" begin + for selector in [1:2, Between(:x1, :x2), Not(r"xx"), [:x1, :x2]] + df = DataFrame(x1=1:3, x2=4:6) + df[!, selector] .= "a" + @test df == DataFrame(fill("a", 3, 2)) + @test df.x1 !== df.x2 + + df = DataFrame(x1=1:3, x2=4:6) + df[!, selector] .= Ref((a=1,b=2)) + @test df == DataFrame(fill((a=1,b=2), 3, 2)) + @test df.x1 !== df.x2 + + df = DataFrame(x1=1:3, x2=4:6) + df[!, selector] .= ["a" "b"] + @test df == DataFrame(["a" "b" + "a" "b" + "a" "b"]) + @test df.x1 !== df.x2 + + df = DataFrame(x1=1:3, x2=4:6) + df[!, selector] .= ["a", "b", "c"] + @test df == DataFrame(["a" "a" + "b" "b" + "c" "c"]) + @test df.x1 !== df.x2 + + df = DataFrame(x1=1:3, x2=4:6) + df[!, selector] .= categorical(["a"]) + @test df == DataFrame(["a" "a" + "a" "a" + "a" "a"]) + @test df.x1 isa CategoricalVector + @test df.x2 isa CategoricalVector + @test df.x1 !== df.x2 + + df = DataFrame(x1=1:3, x2=4:6) + df[!, selector] .= DataFrame(["a" "b"]) + @test df == DataFrame(["a" "b" + "a" "b" + "a" "b"]) + @test df.x1 !== df.x2 + + df = DataFrame(x1=1:3, x2=4:6) + df[!, selector] .= DataFrame(["a" "d" + "b" "e" + "c" "f"]) + @test df == DataFrame(["a" "d" + "b" "e" + "c" "f"]) + @test df.x1 !== df.x2 + + df = DataFrame(x1=1:3, x2=4:6) + df[!, selector] .= ["a" "d" + "b" "e" + "c" "f"] + @test df == DataFrame(["a" "d" + "b" "e" + "c" "f"]) + @test df.x1 !== df.x2 + end + + df = DataFrame(x1=1:3, x2=4:6) + @test_throws ArgumentError df[!, [:x1, :x3]] .= "a" +end + end # module From 374360c1affe24e6807e05aabd04da3984642390 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 14 Aug 2019 16:03:10 +0200 Subject: [PATCH 15/30] additional tests --- test/deprecated.jl | 1 - test/indexing.jl | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/test/deprecated.jl b/test/deprecated.jl index f5dc794072..35781ab253 100644 --- a/test/deprecated.jl +++ b/test/deprecated.jl @@ -371,7 +371,6 @@ end df[[true,false,false,true], 2:3] = df[1:2,1:2] # scalar broadcasting assignment of subtables - df[1, 1:2] = 3 df[1:2, 1:2] = 3 df[[true,false,false,true], 2:3] = 3 diff --git a/test/indexing.jl b/test/indexing.jl index 19542d5f52..c73981fc6f 100644 --- a/test/indexing.jl +++ b/test/indexing.jl @@ -1024,6 +1024,20 @@ end @test_throws DimensionMismatch df[1, :] = [1, 2, 3] @test_throws DimensionMismatch dfr[:] = [1, 2, 3] + @test_throws DimensionMismatch df[1, 1:2] = 3 + @test_throws DimensionMismatch dfr[:] = 3 + + # numbers are iterable + dfr[1:1] = 100 + @test df == DataFrame(a=100, b=11) + df[1, 1:1] = 1000 + @test df == DataFrame(a=1000, b=11) + + # so are strings + dfr[1:1] = "d" + @test df == DataFrame(a=100, b=11) + df[1, 1:1] = "e" + @test df == DataFrame(a=101, b=11) df = view(DataFrame(a=1,b=2), :, :) df[1, :] = Dict(:a=>10, :b=>11) From af66b6fd70f6ea7adc08d6435113816fd2f8c339 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sat, 24 Aug 2019 18:08:13 +0200 Subject: [PATCH 16/30] Apply suggestions from code review Co-Authored-By: Milan Bouchet-Valat --- docs/src/lib/indexing.md | 12 ++++++------ src/dataframe/dataframe.jl | 6 +++--- src/dataframerow/dataframerow.jl | 2 +- src/other/broadcasting.jl | 2 +- test/broadcasting.jl | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/src/lib/indexing.md b/docs/src/lib/indexing.md index d4e71bef09..5ec98baa69 100644 --- a/docs/src/lib/indexing.md +++ b/docs/src/lib/indexing.md @@ -114,7 +114,7 @@ The following list specifies the behavior of `setindex!` operations depending on In particular a description explicitly mentions if the assignment is *in-place*. -Note that if `setindex!` operation throws an error the target data frame may be partially changed +Note that if a `setindex!` operation throws an error the target data frame may be partially changed so it is unsafe to use it afterwards. `setindex!` on `DataFrame`: @@ -131,7 +131,7 @@ so it is unsafe to use it afterwards. this is allowed if `ncol(df) == 0 || length(v) == nrow(df)`; * `df[!, cols] = v` -> replaces existing columns `cols` in data frame `df` with copying; `v` must be an `AbstractMatrix` or an `AbstractDataFrame` - (in this case column names must match); + (in the latter case column names must match); Note that only `df[!, col] = v` and `df.col = v` can be used to add a new column to a `DataFrame`. In particular as `df[:, col] = v` is an in-place operation it does not add a column `v` to a `DataFrame` if `col` is missing @@ -151,9 +151,9 @@ Note that `sdf[!, col] = v`, `sdf[!, cols] = v` and `sdf.col = v` are not allowe * `dfr[col] = v` -> set value of `col` in row `row` to `v` in-place; equivalent to `dfr.col = v` if `col` is a valid identifier; * `dfr[cols] = v` -> set values of entries in columns `cols` in `dfr` by elements of `v` in place; - `v` can be: 1) an iterable in which case it must have a number of elements equal to `length(dfr)`, - 2) an `AbstractDict`, when column names must match, - 3) a `NamedTuple` or `DataFrameRow` when column names and order must match; + `v` can be: 1) an iterable, in which case it must have a number of elements equal to `length(dfr)`, + 2) an `AbstractDict`, in which case column names must match, + 3) a `NamedTuple` or `DataFrameRow`, in which case column names and order must match; ## Broadcasting @@ -184,7 +184,7 @@ Additional rules: * in the `df[row, cols] .= v` syntaxes the assignment to `df` is performed in-place; * in the `df[rows, col] .= v` and `df[rows, cols] .= v` syntaxes the assignment to `df` is performed in-place; * in the `df[!, col] .= v` syntax column `col` is replaced by a freshly allocated vector; if `col` is `Symbol` and it is missing from `df` then a new column is added; the length of the column is always the value of `nrow(df)` before the assignment takes place; -* the `df[!, cols] .= v` replaces existing columns `cols` in data frame `df` with freshly allocated vectors; +* the `df[!, cols] .= v` syntax replaces existing columns `cols` in data frame `df` with freshly allocated vectors; * `df.col .= v` syntax is allowed and performs in-place assignment to an existing vector `df.col`. * in the `sdf[CartesianIndex(row, col)] .= v`, `sdf[row, col] .= v` and `sdf[row, cols] .= v` syntaxes the assignment to `sdf` is performed in-place; * in the `sdf[rows, col] .= v` and `sdf[rows, cols] .= v` syntaxes the assignment to `sdf` is performed in-place; diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index 3ff5dd61c6..8534438c21 100644 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -506,11 +506,11 @@ for T in (:AbstractVector, :Regex, :Not, :Between, :All, :Colon) @eval function Base.setindex!(df::DataFrame, v, row_ind::Integer, - col_inds::$(T)) + col_inds::$T) idxs = index(df)[col_inds] if length(v) != length(idxs) - throw(DimensionMismatch("$(length(idxs)) columns were selected and the assigned" * - " value contains $(length(v)) elements")) + throw(DimensionMismatch("$(length(idxs)) columns were selected but the assigned" * + " collection contains $(length(v)) elements")) end for (i, x) in enumerate(v) df[row_ind, i] = x diff --git a/src/dataframerow/dataframerow.jl b/src/dataframerow/dataframerow.jl index 99aa27d07e..c844666752 100644 --- a/src/dataframerow/dataframerow.jl +++ b/src/dataframerow/dataframerow.jl @@ -117,7 +117,7 @@ for T in (:AbstractVector, :Regex, :Not, :Between, :All, :Colon) if v isa AbstractDict for n in view(_names(df), idxs) if !haskey(v, n) - throw(ArgumentError("Column $n not found in source dictionary")) + throw(ArgumentError("Column :$n not found in source dictionary")) end end end diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index db13090514..8ecc5505fa 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -227,7 +227,7 @@ end function Base.copyto!(crdf::ColReplaceDataFrame, bc::Base.Broadcast.Broadcasted) bcf = Base.Broadcast.flatten(bc) - colnames = unique([_names(x) for x in bcf.args if x isa AbstractDataFrame]) + colnames = unique!([_names(x) for x in bcf.args if x isa AbstractDataFrame]) if length(colnames) > 1 || (length(colnames) == 1 && view(_names(crdf.df), crdf.cols) != colnames[1]) wrongnames = setdiff(union(colnames...), intersect(colnames...)) msg = join(wrongnames, ", ", " and ") diff --git a/test/broadcasting.jl b/test/broadcasting.jl index ccb0fb9096..952aa7e1cb 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -1356,7 +1356,7 @@ end df = DataFrame(x1=1:3, x2=4:6) df[!, selector] .= Ref((a=1,b=2)) - @test df == DataFrame(fill((a=1,b=2), 3, 2)) + @test df == DataFrame(fill((a=1, b=2), 3, 2)) @test df.x1 !== df.x2 df = DataFrame(x1=1:3, x2=4:6) From 34abbf670da1f984f8ad0ac8d10331bdb8041f90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sat, 24 Aug 2019 21:05:34 +0200 Subject: [PATCH 17/30] fixes after the code review --- src/dataframe/dataframe.jl | 33 ++++------------ src/dataframerow/dataframerow.jl | 10 ++--- src/other/broadcasting.jl | 56 ++++++++++++++++++++------ test/broadcasting.jl | 67 ++++++++++++++++++++++++++++++++ test/dataframe.jl | 64 +++++++++++++++--------------- test/indexing.jl | 2 +- 6 files changed, 158 insertions(+), 74 deletions(-) diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index 8534438c21..ddd6f79648 100644 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -523,7 +523,7 @@ end for T in (:AbstractVector, :Not, :Colon) @eval function Base.setindex!(df::DataFrame, v::AbstractVector, - row_inds::$(T), + row_inds::$T, col_ind::ColumnIndex) x = df[!, col_ind] try @@ -542,8 +542,8 @@ for T1 in (:AbstractVector, :Not, :Colon), T2 in (:AbstractVector, :Regex, :Not, :Between, :All, :Colon) @eval function Base.setindex!(df::DataFrame, new_df::AbstractDataFrame, - row_inds::$(T1), - col_inds::$(T2)) + row_inds::$T1, + col_inds::$T2) idxs = index(df)[col_inds] for (j, col) in enumerate(idxs) df[row_inds, col] = new_df[!, j] @@ -559,7 +559,7 @@ for T in (:AbstractVector, :Regex, :Not, :Between, :All, :Colon) @eval function Base.setindex!(df::DataFrame, new_df::AbstractDataFrame, row_inds::typeof(!), - col_inds::$(T)) + col_inds::$T) idxs = index(df)[col_inds] if view(_names(df), idxs) != _names(new_df) throw(ArgumentError("Column names in source and target data frames do not match")) @@ -573,36 +573,19 @@ for T in (:AbstractVector, :Regex, :Not, :Between, :All, :Colon) end # df[MultiRowIndex, MultiColumnIndex] = AbstractMatrix -for T1 in (:AbstractVector, :Not, :Colon), +for T1 in (:AbstractVector, :Not, :Colon, :(typeof(!))), T2 in (:AbstractVector, :Regex, :Not, :Between, :All, :Colon) @eval function Base.setindex!(df::DataFrame, mx::AbstractMatrix, - row_inds::$(T1), - col_inds::$(T2)) - idxs = index(df)[col_inds] - if size(mx, 2) != length(idxs) - throw(DimensionMismatch("number of selected columns ($(length(idxs))) and number of columns in" * - " matrix ($(size(mx, 2))) do not match")) - end - for (j, col) in enumerate(idxs) - df[row_inds, col] = view(mx, :, j) - end - return df - end -end - -for T in (:AbstractVector, :Regex, :Not, :Between, :All, :Colon) - @eval function Base.setindex!(df::DataFrame, - mx::AbstractMatrix, - row_inds::typeof(!), - col_inds::$(T)) + row_inds::$T1, + col_inds::$T2) idxs = index(df)[col_inds] if size(mx, 2) != length(idxs) throw(DimensionMismatch("number of selected columns ($(length(idxs))) and number of columns in" * " matrix ($(size(mx, 2))) do not match")) end for (j, col) in enumerate(idxs) - df[!, col] = mx[:, j] + df[row_inds, col] = row_inds isa typeof(!) ? mx[:, j] : view(mx, :, j) end return df end diff --git a/src/dataframerow/dataframerow.jl b/src/dataframerow/dataframerow.jl index c844666752..ac9630e2b8 100644 --- a/src/dataframerow/dataframerow.jl +++ b/src/dataframerow/dataframerow.jl @@ -109,18 +109,18 @@ for T in (:AbstractVector, :Regex, :Not, :Between, :All, :Colon) " value contains $(length(v)) elements")) end - if !(v isa AbstractDict || all(((a, b),) -> a == b, zip(view(_names(df), idxs), keys(v)))) - mismatched = findall(view(_names(df), idxs) .!= collect(keys(v))) - throw(ArgumentError("Selected column names do not match the names in assigned value in" * - " positions $(join(mismatched, ", ", " and "))")) - end if v isa AbstractDict for n in view(_names(df), idxs) if !haskey(v, n) throw(ArgumentError("Column :$n not found in source dictionary")) end end + elseif !all(((a, b),) -> a == b, zip(view(_names(df), idxs), keys(v))) + mismatched = findall(view(_names(df), idxs) .!= collect(keys(v))) + throw(ArgumentError("Selected column names do not match the names in assigned value in" * + " positions $(join(mismatched, ", ", " and "))")) end + for (col, val) in pairs(v) df[row_ind, col] = val end diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index 8ecc5505fa..e98a417785 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -27,7 +27,7 @@ function copyto_widen!(res::AbstractVector{T}, bc::Base.Broadcast.Broadcasted, newres = similar(Vector{promote_type(S, T)}, length(res)) copyto!(newres, 1, res, 1, i-1) newres[i] = val - return copyto_widen!(newres, bc, i + 1, 2) + return copyto_widen!(newres, bc, i + 1, col) end end return res @@ -50,9 +50,14 @@ function Base.copy(bc::Base.Broadcast.Broadcasted{DataFrameStyle}) colnames = unique([_names(df) for df in bcf.args if df isa AbstractDataFrame]) if length(colnames) != 1 wrongnames = setdiff(union(colnames...), intersect(colnames...)) - msg = join(wrongnames, ", ", " and ") - throw(ArgumentError("Column names in broadcasted data frames must match. " * - "Non matching column names are $msg")) + if isempty(wrongnames) + throw(ArgumentError("Column names in broadcasted data frames " * + "must have the same order")) + else + msg = join(wrongnames, ", ", " and ") + throw(ArgumentError("Column names in broadcasted data frames must match. " * + "Non matching column names are $msg")) + end end nrows = length(axes(bcf)[1]) df = DataFrame() @@ -200,10 +205,16 @@ function Base.copyto!(df::AbstractDataFrame, bc::Base.Broadcast.Broadcasted) bcf = Base.Broadcast.flatten(bc) colnames = unique([_names(x) for x in bcf.args if x isa AbstractDataFrame]) if length(colnames) > 1 || (length(colnames) == 1 && _names(df) != colnames[1]) + push!(colnames, _names(df)) wrongnames = setdiff(union(colnames...), intersect(colnames...)) - msg = join(wrongnames, ", ", " and ") - throw(ArgumentError("Column names in broadcasted data frames must match. " * - "Non matching column names are $msg")) + if isempty(wrongnames) + throw(ArgumentError("Column names in broadcasted data frames " * + "must have the same order")) + else + msg = join(wrongnames, ", ", " and ") + throw(ArgumentError("Column names in broadcasted data frames must match. " * + "Non matching column names are $msg")) + end end bcf′ = Base.Broadcast.preprocess(df, bcf) @@ -229,15 +240,38 @@ function Base.copyto!(crdf::ColReplaceDataFrame, bc::Base.Broadcast.Broadcasted) bcf = Base.Broadcast.flatten(bc) colnames = unique!([_names(x) for x in bcf.args if x isa AbstractDataFrame]) if length(colnames) > 1 || (length(colnames) == 1 && view(_names(crdf.df), crdf.cols) != colnames[1]) + push!(colnames, view(_names(crdf.df), crdf.cols)) wrongnames = setdiff(union(colnames...), intersect(colnames...)) - msg = join(wrongnames, ", ", " and ") - throw(ArgumentError("Column names in broadcasted data frames must match. " * - "Non matching column names are $msg")) + if isempty(wrongnames) + throw(ArgumentError("Column names in broadcasted data frames " * + "must have the same order")) + else + msg = join(wrongnames, ", ", " and ") + throw(ArgumentError("Column names in broadcasted data frames must match. " * + "Non matching column names are $msg")) + end end bcf′ = Base.Broadcast.preprocess(crdf, bcf) + nrows = length(axes(bcf′)[1]) for (i, col) in enumerate(crdf.cols) - crdf.df[!, col] = [bcf′[CartesianIndex(j, i)] for j in axes(crdf.df, 1)] + bcf′_col = getcolbc(bcf′, i) + if bcf′_col isa Base.Broadcast.Broadcasted{<:Base.Broadcast.AbstractArrayStyle{0}} + bc_tmp = Base.Broadcast.Broadcasted{T}(bcf′_col.f, bcf′_col.args, ()) + v = Base.Broadcast.materialize(bc_tmp) + col = similar(Vector{typeof(v)}, nrow(crdf.df)) + copyto!(col, bc) + else + if nrows == 0 + col = Any[] + else + v1 = bcf′_col[CartesianIndex(1, i)] + startcol = similar(Vector{typeof(v1)}, nrows) + startcol[1] = v1 + col = copyto_widen!(startcol, bcf′_col, 2, i) + end + end + crdf.df[!, col] = col end crdf.df end diff --git a/test/broadcasting.jl b/test/broadcasting.jl index 952aa7e1cb..88d83c2918 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -1406,10 +1406,77 @@ end "b" "e" "c" "f"]) @test df.x1 !== df.x2 + + df = DataFrame(x1=1:3, x2=4:6, x3=1) + df[!, selector] .= "a" + @test df == DataFrame(["a" "a" 1 + "a" "a" 1 + "a" "a" 1]) + @test df.x1 !== df.x2 + + df = DataFrame(x1=1:3, x2=4:6, x3=1) + df[!, selector] .= Ref((a=1,b=2)) + @test df[:, 1:2] == DataFrame(fill((a=1, b=2), 3, 2)) + @test df[:, 3] == [1, 1, 1] + @test df.x1 !== df.x2 + + df = DataFrame(x1=1:3, x2=4:6, x3=1) + df[!, selector] .= ["a" "b"] + @test df == DataFrame(["a" "b" 1 + "a" "b" 1 + "a" "b" 1]) + @test df.x1 !== df.x2 + + df = DataFrame(x1=1:3, x2=4:6, x3=1) + df[!, selector] .= ["a", "b", "c"] + @test df == DataFrame(["a" "a" 1 + "b" "b" 1 + "c" "c" 1]) + @test df.x1 !== df.x2 + + df = DataFrame(x1=1:3, x2=4:6, x3=1) + df[!, selector] .= categorical(["a"]) + @test df == DataFrame(["a" "a" 1 + "a" "a" 1 + "a" "a" 1]) + @test df.x1 isa CategoricalVector + @test df.x2 isa CategoricalVector + @test df.x1 !== df.x2 + + df = DataFrame(x1=1:3, x2=4:6, x3=1) + df[!, selector] .= DataFrame(["a" "b"]) + @test df == DataFrame(["a" "b" 1 + "a" "b" 1 + "a" "b" 1]) + @test df.x1 !== df.x2 + + df = DataFrame(x1=1:3, x2=4:6, x3=1) + df[!, selector] .= DataFrame(["a" "d" + "b" "e" + "c" "f"]) + @test df == DataFrame(["a" "d" 1 + "b" "e" 1 + "c" "f" 1]) + @test df.x1 !== df.x2 + + df = DataFrame(x1=1:3, x2=4:6, x3=1) + df[!, selector] .= ["a" "d" + "b" "e" + "c" "f"] + @test df == DataFrame(["a" "d" 1 + "b" "e" 1 + "c" "f" 1]) + @test df.x1 !== df.x2 end df = DataFrame(x1=1:3, x2=4:6) @test_throws ArgumentError df[!, [:x1, :x3]] .= "a" end +@testset "broadcasting over heterogenous columns" begin + df = DataFrame(x = [1, 1.0, big(1), "1"]) + f_identity(x) = x + @test df == f_identity.(df) +end + end # module diff --git a/test/dataframe.jl b/test/dataframe.jl index 7d0d0b5f3b..6d518638bb 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -18,8 +18,8 @@ const ≇ = !isequal end @testset "copying" begin - df = DataFrame(a = Union{Int, Missing}[2, 3], - b = Union{DataFrame, Missing}[DataFrame(c = 1), DataFrame(d = 2)]) + df = DataFrame(a=Union{Int, Missing}[2, 3], + b=Union{DataFrame, Missing}[DataFrame(c = 1), DataFrame(d = 2)]) dfc = copy(df) dfdc = deepcopy(df) @@ -38,12 +38,12 @@ end end @testset "similar / missings" begin - df = DataFrame(a = Union{Int, Missing}[1], - b = Union{String, Missing}["b"], - c = CategoricalArray{Union{Float64, Missing}}([3.3])) - missingdf = DataFrame(a = missings(Int, 2), - b = missings(String, 2), - c = CategoricalArray{Union{Float64, Missing}}(undef, 2)) + df = DataFrame(a=Union{Int, Missing}[1], + b=Union{String, Missing}["b"], + c=CategoricalArray{Union{Float64, Missing}}([3.3])) + missingdf = DataFrame(a=missings(Int, 2), + b=missings(String, 2), + c=CategoricalArray{Union{Float64, Missing}}(undef, 2)) # https://github.com/JuliaData/Missings.jl/issues/66 # @test missingdf ≅ similar(df, 2) @test typeof.(eachcol(similar(df, 2))) == typeof.(eachcol(missingdf)) @@ -147,97 +147,97 @@ end buf = IOBuffer() sl = SimpleLogger(buf) - df=DataFrame(first=[1,2,3], second=["apple","orange","pear"]) + df = DataFrame(first=[1,2,3], second=["apple","orange","pear"]) - dfb= DataFrame(first=[1,2], second=["apple","orange"]) - dfc= DataFrame(first=[1,2], second=["apple","orange"]) + dfb = DataFrame(first=[1,2], second=["apple","orange"]) + dfc = DataFrame(first=[1,2], second=["apple","orange"]) push!(dfb, Any[3,"pear"]) @test df == dfb - dfb= DataFrame(first=[1,2], second=["apple","orange"]) + dfb = DataFrame(first=[1,2], second=["apple","orange"]) push!(dfb, (3,"pear")) @test df == dfb - dfb= DataFrame(first=[1,2], second=["apple","orange"]) + dfb = DataFrame(first=[1,2], second=["apple","orange"]) with_logger(sl) do @test_throws InexactError push!(dfb, (33.33,"pear")) end @test dfc == dfb @test occursin("Error adding value to column :first", String(take!(buf))) - dfb= DataFrame(first=[1,2], second=["apple","orange"]) + dfb = DataFrame(first=[1,2], second=["apple","orange"]) @test_throws ArgumentError push!(dfb, (1,"2",3)) @test dfc == dfb - dfb= DataFrame(first=[1,2], second=["apple","orange"]) + dfb = DataFrame(first=[1,2], second=["apple","orange"]) with_logger(sl) do @test_throws MethodError push!(dfb, ("coconut",22)) end @test dfc == dfb @test occursin("Error adding value to column :first", String(take!(buf))) - dfb= DataFrame(first=[1,2], second=["apple","orange"]) + dfb = DataFrame(first=[1,2], second=["apple","orange"]) with_logger(sl) do @test_throws MethodError push!(dfb, (11,22)) end @test dfc == dfb @test occursin("Error adding value to column :second", String(take!(buf))) - dfb= DataFrame(first=[1,2], second=["apple","orange"]) + dfb = DataFrame(first=[1,2], second=["apple","orange"]) push!(dfb, Dict(:first=>3, :second=>"pear")) @test df == dfb - df=DataFrame(first=[1,2,3], second=["apple","orange","banana"]) - dfb= DataFrame(first=[1,2], second=["apple","orange"]) + df = DataFrame(first=[1,2,3], second=["apple","orange","banana"]) + dfb = DataFrame(first=[1,2], second=["apple","orange"]) push!(dfb, Dict(:first=>3, :second=>"banana")) @test df == dfb - dfb= DataFrame(first=[1,2], second=["apple","orange"]) + dfb = DataFrame(first=[1,2], second=["apple","orange"]) push!(dfb, (first=3, second="banana")) @test df == dfb - dfb= DataFrame(first=[1,2], second=["apple","orange"]) + dfb = DataFrame(first=[1,2], second=["apple","orange"]) push!(dfb, (second="banana", first=3)) @test df == dfb - df0= DataFrame(first=[1,2], second=["apple","orange"]) - dfb= DataFrame(first=[1,2], second=["apple","orange"]) + df0 = DataFrame(first=[1,2], second=["apple","orange"]) + dfb = DataFrame(first=[1,2], second=["apple","orange"]) with_logger(sl) do @test_throws MethodError push!(dfb, (second=3, first=3)) end @test df0 == dfb @test occursin("Error adding value to column :second", String(take!(buf))) - dfb= DataFrame(first=[1,2], second=["apple","orange"]) + dfb = DataFrame(first=[1,2], second=["apple","orange"]) push!(dfb, (second="banana", first=3)) @test df == dfb - df0= DataFrame(first=[1,2], second=["apple","orange"]) - dfb= DataFrame(first=[1,2], second=["apple","orange"]) + df0 = DataFrame(first=[1,2], second=["apple","orange"]) + dfb = DataFrame(first=[1,2], second=["apple","orange"]) with_logger(sl) do @test_throws MethodError push!(dfb, Dict(:first=>true, :second=>false)) end @test df0 == dfb @test occursin("Error adding value to column :second", String(take!(buf))) - df0= DataFrame(first=[1,2], second=["apple","orange"]) - dfb= DataFrame(first=[1,2], second=["apple","orange"]) + df0 = DataFrame(first=[1,2], second=["apple","orange"]) + dfb = DataFrame(first=[1,2], second=["apple","orange"]) with_logger(sl) do @test_throws MethodError push!(dfb, Dict(:first=>"chicken", :second=>"stuff")) end @test df0 == dfb @test occursin("Error adding value to column :first", String(take!(buf))) - df0=DataFrame(first=[1,2,3], second=["apple","orange","pear"]) - dfb=DataFrame(first=[1,2,3], second=["apple","orange","pear"]) + df0 = DataFrame(first=[1,2,3], second=["apple","orange","pear"]) + dfb = DataFrame(first=[1,2,3], second=["apple","orange","pear"]) with_logger(sl) do @test_throws MethodError push!(dfb, Dict(:first=>"chicken", :second=>1)) end @test df0 == dfb @test occursin("Error adding value to column :first", String(take!(buf))) - df0=DataFrame(first=["1","2","3"], second=["apple","orange","pear"]) - dfb=DataFrame(first=["1","2","3"], second=["apple","orange","pear"]) + df0 = DataFrame(first=["1","2","3"], second=["apple","orange","pear"]) + dfb = DataFrame(first=["1","2","3"], second=["apple","orange","pear"]) with_logger(sl) do @test_throws MethodError push!(dfb, Dict(:first=>"chicken", :second=>1)) end diff --git a/test/indexing.jl b/test/indexing.jl index c73981fc6f..5a66c6cc02 100644 --- a/test/indexing.jl +++ b/test/indexing.jl @@ -1310,7 +1310,7 @@ end end end -@testset "old setindex! tests adjusted to new rules" begin +@testset "additional setindex! tests" begin df = DataFrame(reshape(1:12, 4, :)) @test_throws MethodError df[1, :] = df[1:1, :] From b4d44c393afaf607a619b0c2fb91b0decc8467c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sat, 24 Aug 2019 21:10:23 +0200 Subject: [PATCH 18/30] documentation update --- docs/src/lib/indexing.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/lib/indexing.md b/docs/src/lib/indexing.md index 5ec98baa69..31215d3030 100644 --- a/docs/src/lib/indexing.md +++ b/docs/src/lib/indexing.md @@ -115,7 +115,7 @@ The following list specifies the behavior of `setindex!` operations depending on In particular a description explicitly mentions if the assignment is *in-place*. Note that if a `setindex!` operation throws an error the target data frame may be partially changed -so it is unsafe to use it afterwards. +so it is unsafe to use it afterwards (the column length correctness will be preserved). `setindex!` on `DataFrame`: * `df[row, col] = v` -> set value of `col` in row `row` to `v` in-place; @@ -166,7 +166,7 @@ The following broadcasting rules apply to `AbstractDataFrame` objects: * If multiple `AbstractDataFrame` objects take part in broadcasting then they have to have identical column names. Note that if broadcasting assignment operation throws an error the target data frame may be partially changed -so it is unsafe to use it afterwards. +so it is unsafe to use it afterwards (the column length correctness will be preserved). Broadcasting `DataFrameRow` is currently not allowed (which is consistent with `NamedTuple`). From 3ae4924ab29d68ddde1c4e86e954fed567aaeeb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sat, 24 Aug 2019 22:03:10 +0200 Subject: [PATCH 19/30] fix type inference problem --- src/other/broadcasting.jl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index e98a417785..e8cb4971bc 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -236,6 +236,9 @@ function Base.copyto!(df::AbstractDataFrame, bc::Base.Broadcast.Broadcasted{<:Ba end end +create_bc_tmp(bcf′_col::Base.Broadcast.Broadcasted{T}) = + Base.Broadcast.Broadcasted{T}(bcf′_col.f, bcf′_col.args, ()) + function Base.copyto!(crdf::ColReplaceDataFrame, bc::Base.Broadcast.Broadcasted) bcf = Base.Broadcast.flatten(bc) colnames = unique!([_names(x) for x in bcf.args if x isa AbstractDataFrame]) @@ -257,7 +260,7 @@ function Base.copyto!(crdf::ColReplaceDataFrame, bc::Base.Broadcast.Broadcasted) for (i, col) in enumerate(crdf.cols) bcf′_col = getcolbc(bcf′, i) if bcf′_col isa Base.Broadcast.Broadcasted{<:Base.Broadcast.AbstractArrayStyle{0}} - bc_tmp = Base.Broadcast.Broadcasted{T}(bcf′_col.f, bcf′_col.args, ()) + bc_tmp = create_bc_tmp(bcf′_col) v = Base.Broadcast.materialize(bc_tmp) col = similar(Vector{typeof(v)}, nrow(crdf.df)) copyto!(col, bc) From bf8eccb7a61f59eaea2a622854febaf339fdec7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 25 Aug 2019 00:18:53 +0200 Subject: [PATCH 20/30] fix syntax error --- src/other/broadcasting.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index e8cb4971bc..b3d0b014b1 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -236,7 +236,7 @@ function Base.copyto!(df::AbstractDataFrame, bc::Base.Broadcast.Broadcasted{<:Ba end end -create_bc_tmp(bcf′_col::Base.Broadcast.Broadcasted{T}) = +create_bc_tmp(bcf′_col::Base.Broadcast.Broadcasted{T}) where {T} = Base.Broadcast.Broadcasted{T}(bcf′_col.f, bcf′_col.args, ()) function Base.copyto!(crdf::ColReplaceDataFrame, bc::Base.Broadcast.Broadcasted) From ee62210433b116a5b60a9e19c8fef15f9dfca5c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 25 Aug 2019 00:33:59 +0200 Subject: [PATCH 21/30] fix bug in broadcasting code --- src/other/broadcasting.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index b3d0b014b1..331be47eb4 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -257,24 +257,24 @@ function Base.copyto!(crdf::ColReplaceDataFrame, bc::Base.Broadcast.Broadcasted) bcf′ = Base.Broadcast.preprocess(crdf, bcf) nrows = length(axes(bcf′)[1]) - for (i, col) in enumerate(crdf.cols) + for (i, col_idx) in enumerate(crdf.cols) bcf′_col = getcolbc(bcf′, i) if bcf′_col isa Base.Broadcast.Broadcasted{<:Base.Broadcast.AbstractArrayStyle{0}} bc_tmp = create_bc_tmp(bcf′_col) v = Base.Broadcast.materialize(bc_tmp) - col = similar(Vector{typeof(v)}, nrow(crdf.df)) - copyto!(col, bc) + newcol = similar(Vector{typeof(v)}, nrow(crdf.df)) + copyto!(newcol, bc) else if nrows == 0 - col = Any[] + newcol = Any[] else v1 = bcf′_col[CartesianIndex(1, i)] startcol = similar(Vector{typeof(v1)}, nrows) startcol[1] = v1 - col = copyto_widen!(startcol, bcf′_col, 2, i) + newcol = copyto_widen!(startcol, bcf′_col, 2, i) end end - crdf.df[!, col] = col + crdf.df[!, col_idx] = newcol end crdf.df end From 6c8201ed2ec60270a4248d40c7e7826940593fc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 25 Aug 2019 02:31:36 +0200 Subject: [PATCH 22/30] fix tests --- test/broadcasting.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/broadcasting.jl b/test/broadcasting.jl index 88d83c2918..06d816f5a3 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -1348,7 +1348,7 @@ end end @testset "broadcasting into df[!, cols]" begin - for selector in [1:2, Between(:x1, :x2), Not(r"xx"), [:x1, :x2]] + for selector in [1:2, Between(:x1, :x2), Not(r"x3"), [:x1, :x2]] df = DataFrame(x1=1:3, x2=4:6) df[!, selector] .= "a" @test df == DataFrame(fill("a", 3, 2)) From 8c3ed7224bbfaab7360b69e6caf7e597afa8ee7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 1 Sep 2019 18:42:35 +0200 Subject: [PATCH 23/30] Apply suggestions from code review Co-Authored-By: Milan Bouchet-Valat --- src/dataframe/dataframe.jl | 2 +- src/dataframerow/dataframerow.jl | 2 +- src/other/broadcasting.jl | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index ddd6f79648..4d58e37e72 100644 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -585,7 +585,7 @@ for T1 in (:AbstractVector, :Not, :Colon, :(typeof(!))), " matrix ($(size(mx, 2))) do not match")) end for (j, col) in enumerate(idxs) - df[row_inds, col] = row_inds isa typeof(!) ? mx[:, j] : view(mx, :, j) + df[row_inds, col] = (row_inds === !) ? mx[:, j] : view(mx, :, j) end return df end diff --git a/src/dataframerow/dataframerow.jl b/src/dataframerow/dataframerow.jl index ac9630e2b8..6d2f92a917 100644 --- a/src/dataframerow/dataframerow.jl +++ b/src/dataframerow/dataframerow.jl @@ -106,7 +106,7 @@ for T in (:AbstractVector, :Regex, :Not, :Between, :All, :Colon) idxs = index(df)[col_inds] if length(v) != length(idxs) throw(DimensionMismatch("$(length(idxs)) columns were selected but the assigned" * - " value contains $(length(v)) elements")) + " collection contains $(length(v)) elements")) end if v isa AbstractDict diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index 331be47eb4..a8c8607a14 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -47,7 +47,7 @@ function Base.copy(bc::Base.Broadcast.Broadcasted{DataFrameStyle}) throw(DimensionMismatch("cannot broadcast a data frame into $ndim dimensions")) end bcf = Base.Broadcast.flatten(bc) - colnames = unique([_names(df) for df in bcf.args if df isa AbstractDataFrame]) + colnames = unique!([_names(df) for df in bcf.args if df isa AbstractDataFrame]) if length(colnames) != 1 wrongnames = setdiff(union(colnames...), intersect(colnames...)) if isempty(wrongnames) @@ -203,7 +203,7 @@ end function Base.copyto!(df::AbstractDataFrame, bc::Base.Broadcast.Broadcasted) bcf = Base.Broadcast.flatten(bc) - colnames = unique([_names(x) for x in bcf.args if x isa AbstractDataFrame]) + colnames = unique!([_names(x) for x in bcf.args if x isa AbstractDataFrame]) if length(colnames) > 1 || (length(colnames) == 1 && _names(df) != colnames[1]) push!(colnames, _names(df)) wrongnames = setdiff(union(colnames...), intersect(colnames...)) From 867e34897a0595315f12c074986175b2bc4bd11e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 2 Sep 2019 14:47:53 +0200 Subject: [PATCH 24/30] stricter DataFrameRow setindex! rules --- docs/src/lib/indexing.md | 4 +- src/dataframe/dataframe.jl | 2 +- test/indexing.jl | 121 ++++++++++++++++++++++++++++++------- 3 files changed, 103 insertions(+), 24 deletions(-) diff --git a/docs/src/lib/indexing.md b/docs/src/lib/indexing.md index 31215d3030..e4eb1294e6 100644 --- a/docs/src/lib/indexing.md +++ b/docs/src/lib/indexing.md @@ -151,7 +151,9 @@ Note that `sdf[!, col] = v`, `sdf[!, cols] = v` and `sdf.col = v` are not allowe * `dfr[col] = v` -> set value of `col` in row `row` to `v` in-place; equivalent to `dfr.col = v` if `col` is a valid identifier; * `dfr[cols] = v` -> set values of entries in columns `cols` in `dfr` by elements of `v` in place; - `v` can be: 1) an iterable, in which case it must have a number of elements equal to `length(dfr)`, + `v` can be: + 1) a `Tuple`, an `AbstractArray` or a `Base.Generator`, + in which cases it must have a number of elements equal to `length(dfr)`, 2) an `AbstractDict`, in which case column names must match, 3) a `NamedTuple` or `DataFrameRow`, in which case column names and order must match; diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index 4d58e37e72..c4043b8ccd 100644 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -504,7 +504,7 @@ end for T in (:AbstractVector, :Regex, :Not, :Between, :All, :Colon) @eval function Base.setindex!(df::DataFrame, - v, + v::Union{Tuple, AbstractArray, Base.Generator}, row_ind::Integer, col_inds::$T) idxs = index(df)[col_inds] diff --git a/test/indexing.jl b/test/indexing.jl index 5a66c6cc02..642a2d9691 100644 --- a/test/indexing.jl +++ b/test/indexing.jl @@ -1175,9 +1175,48 @@ end @test df == DataFrame(a=[98, 2, 3], b=4:6, c=7:9) end - # `dfr[cols] = v` -> set values of entries in columns `cols` in `dfr` by elements of `v` in place; - # `v` can be an `AbstractVector` or `v` can be a `NamedTuple` or `DataFrameRow` - # when column names must match; + # * `dfr[cols] = v` -> set values of entries in columns `cols` in `dfr` by elements of `v` in place; + # `v` can be: + # 1) a `Tuple`, an `AbstractArray` or a `Base.Generator`, + # in which cases it must have a number of elements equal to `length(dfr)`, + # 2) an `AbstractDict`, in which case column names must match, + # 3) a `NamedTuple` or `DataFrameRow`, in which case column names and order must match; + + df = DataFrame(a=1,b=2) + dfr = df[1, :] + dfr[:] = (10, 11) + @test df == DataFrame(a=10,b=11) + df = DataFrame(a=1,b=2) + dfr = df[1, :] + @test_throws DimensionMismatch dfr[:] = (10, 11, 12) + @test df == DataFrame(a=1,b=2) + + df = DataFrame(a=1,b=2) + dfr = df[1, :] + dfr[:] = [10, 11] + @test df == DataFrame(a=10,b=11) + df = DataFrame(a=1,b=2) + dfr = df[1, :] + @test_throws DimensionMismatch dfr[:] = [10, 11, 12] + @test df == DataFrame(a=1,b=2) + + df = DataFrame(a=1,b=2) + dfr = df[1, :] + dfr[:] = [10 11] + @test df == DataFrame(a=10,b=11) + df = DataFrame(a=1,b=2) + dfr = df[1, :] + @test_throws DimensionMismatch dfr[:] = [10 11 12] + @test df == DataFrame(a=1,b=2) + + df = DataFrame(a=1,b=2) + dfr = df[1, :] + dfr[:] = (i for i in 10:11, _ in 1:1, _ in 1:1) + @test df == DataFrame(a=10,b=11) + df = DataFrame(a=1,b=2) + dfr = df[1, :] + @test_throws DimensionMismatch dfr[:] = (i for i in 10:12, _ in 1:1, _ in 1:1) + @test df == DataFrame(a=1,b=2) df = DataFrame(a=1,b=2) dfr = df[1, :] @@ -1226,52 +1265,90 @@ end @test_throws DimensionMismatch dfr[:] = DataFrame(a=10, b=11, c=12)[1, :] @test df == DataFrame(a=1,b=2) - df = DataFrame(a=1,b=2, c=3) + df = DataFrame(a=1, b=2, c=3) + dfr = df[1, :] + dfr[Not(3)] = (10, 11) + @test df == DataFrame(a=10,b=11, c=3) + df = DataFrame(a=1, b=2, c=3) + dfr = df[1, :] + @test_throws DimensionMismatch dfr[Not(3)] = (10, 11, 12) + @test df == DataFrame(a=1, b=2, c=3) + + df = DataFrame(a=1, b=2, c=3) + dfr = df[1, :] + dfr[Not(3)] = [10, 11] + @test df == DataFrame(a=10,b=11, c=3) + df = DataFrame(a=1, b=2, c=3) + dfr = df[1, :] + @test_throws DimensionMismatch dfr[Not(3)] = [10, 11, 12] + @test df == DataFrame(a=1, b=2, c=3) + + df = DataFrame(a=1, b=2, c=3) + dfr = df[1, :] + dfr[Not(3)] = [10 11] + @test df == DataFrame(a=10,b=11, c=3) + df = DataFrame(a=1, b=2, c=3) + dfr = df[1, :] + @test_throws DimensionMismatch dfr[Not(3)] = [10 11 12] + @test df == DataFrame(a=1, b=2, c=3) + + df = DataFrame(a=1, b=2, c=3) + dfr = df[1, :] + dfr[Not(3)] = (i for i in 11:12, _ in 1:1, _ in 1:1) + @test df == DataFrame(a=10,b=11, c=3) + df = DataFrame(a=1, b=2, c=3) + dfr = df[1, :] + @test_throws DimensionMismatch dfr[Not(3)] = (i for i in 11:13, _ in 1:1, _ in 1:1) + @test df == DataFrame(a=1, b=2, c=3) + + df = DataFrame(a=1, b=2, c=3) dfr = df[1, :] dfr[Not(3)] = Dict(:a=>10, :b=>11) @test df == DataFrame(a=10,b=11, c=3) - df = DataFrame(a=1,b=2, c=3) + df = DataFrame(a=1, b=2, c=3) dfr = df[1, :] @test_throws ArgumentError dfr[Not(3)] = Dict(:a=>10, :c=>11) - @test df == DataFrame(a=1,b=2, c=3) - df = DataFrame(a=1,b=2, c=3) + @test df == DataFrame(a=1, b=2, c=3) + df = DataFrame(a=1, b=2, c=3) dfr = df[1, :] @test_throws DimensionMismatch dfr[Not(3)] = Dict(:a=>10, :b=>11, :c=>12) - @test df == DataFrame(a=1,b=2, c=3) + @test df == DataFrame(a=1, b=2, c=3) - df = DataFrame(a=1,b=2, c=3) + df = DataFrame(a=1, b=2, c=3) dfr = df[1, :] dfr[Not(3)] = (a=10, b=11) @test df == DataFrame(a=10,b=11, c=3) - df = DataFrame(a=1,b=2, c=3) + df = DataFrame(a=1, b=2, c=3) dfr = df[1, :] @test_throws ArgumentError dfr[Not(3)] = (a=10, c=11) - @test df == DataFrame(a=1,b=2, c=3) - df = DataFrame(a=1,b=2, c=3) + @test df == DataFrame(a=1, b=2, c=3) + df = DataFrame(a=1, b=2, c=3) dfr = df[1, :] @test_throws ArgumentError dfr[Not(3)] = (b=10, a=11) - @test df == DataFrame(a=1,b=2, c=3) - df = DataFrame(a=1,b=2, c=3) + @test df == DataFrame(a=1, b=2, c=3) + df = DataFrame(a=1, b=2, c=3) dfr = df[1, :] @test_throws DimensionMismatch dfr[Not(3)] = (a=10, b=11, c=12) - @test df == DataFrame(a=1,b=2, c=3) + @test df == DataFrame(a=1, b=2, c=3) - df = DataFrame(a=1,b=2, c=3) + df = DataFrame(a=1, b=2, c=3) dfr = df[1, :] dfr[Not(3)] = DataFrame(a=10, b=11)[1, :] @test df == DataFrame(a=10,b=11, c=3) - df = DataFrame(a=1,b=2, c=3) + df = DataFrame(a=1, b=2, c=3) dfr = df[1, :] @test_throws ArgumentError dfr[Not(3)] = DataFrame(a=10, c=11)[1, :] - @test df == DataFrame(a=1,b=2, c=3) - df = DataFrame(a=1,b=2, c=3) + @test df == DataFrame(a=1, b=2, c=3) + df = DataFrame(a=1, b=2, c=3) dfr = df[1, :] @test_throws ArgumentError dfr[Not(3)] = DataFrame(b=10, a=11)[1, :] - @test df == DataFrame(a=1,b=2, c=3) - df = DataFrame(a=1,b=2, c=3) + @test df == DataFrame(a=1, b=2, c=3) + df = DataFrame(a=1, b=2, c=3) dfr = df[1, :] @test_throws DimensionMismatch dfr[Not(3)] = DataFrame(a=10, b=11, c=12)[1, :] - @test df == DataFrame(a=1,b=2, c=3) + @test df == DataFrame(a=1, b=2, c=3) + + @test_throws MethodError dfr[:] = "ab" end @testset "setindex! with ! or : and multiple cols" begin From 35c88eef0159c14a396a2b0c3067d3add3d0fafa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 2 Sep 2019 15:53:57 +0200 Subject: [PATCH 25/30] fix more tests --- test/indexing.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/indexing.jl b/test/indexing.jl index 642a2d9691..aab11cde8d 100644 --- a/test/indexing.jl +++ b/test/indexing.jl @@ -1024,8 +1024,8 @@ end @test_throws DimensionMismatch df[1, :] = [1, 2, 3] @test_throws DimensionMismatch dfr[:] = [1, 2, 3] - @test_throws DimensionMismatch df[1, 1:2] = 3 - @test_throws DimensionMismatch dfr[:] = 3 + @test_throws MethodError df[1, 1:2] = 3 + @test_throws MethodError dfr[:] = 3 # numbers are iterable dfr[1:1] = 100 From 44a5867df0477781e4ccd1d709908dc3937ee842 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 2 Sep 2019 16:22:40 +0200 Subject: [PATCH 26/30] fix more tests --- test/indexing.jl | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/test/indexing.jl b/test/indexing.jl index aab11cde8d..dd384c9371 100644 --- a/test/indexing.jl +++ b/test/indexing.jl @@ -1026,18 +1026,10 @@ end @test_throws DimensionMismatch dfr[:] = [1, 2, 3] @test_throws MethodError df[1, 1:2] = 3 @test_throws MethodError dfr[:] = 3 - - # numbers are iterable - dfr[1:1] = 100 - @test df == DataFrame(a=100, b=11) - df[1, 1:1] = 1000 - @test df == DataFrame(a=1000, b=11) - - # so are strings - dfr[1:1] = "d" - @test df == DataFrame(a=100, b=11) - df[1, 1:1] = "e" - @test df == DataFrame(a=101, b=11) + @test_throws MethodError dfr[1:1] = 100 + @test_throws MethodError df[1, 1:1] = 1000 + @test_throws MethodError dfr[1:1] = "d" + @test_throws MethodError df[1, 1:1] = "e" df = view(DataFrame(a=1,b=2), :, :) df[1, :] = Dict(:a=>10, :b=>11) From b3b0dddcf6e14ae8f2e396a194c248ea5e53ca6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 2 Sep 2019 16:52:31 +0200 Subject: [PATCH 27/30] fix bad test --- test/indexing.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/indexing.jl b/test/indexing.jl index dd384c9371..41502d2442 100644 --- a/test/indexing.jl +++ b/test/indexing.jl @@ -1286,7 +1286,7 @@ end df = DataFrame(a=1, b=2, c=3) dfr = df[1, :] - dfr[Not(3)] = (i for i in 11:12, _ in 1:1, _ in 1:1) + dfr[Not(3)] = (i for i in 10:11, _ in 1:1, _ in 1:1) @test df == DataFrame(a=10,b=11, c=3) df = DataFrame(a=1, b=2, c=3) dfr = df[1, :] From e476e081047ca4d0b3d1e70b6217c3d185bacdca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 2 Sep 2019 19:29:43 +0200 Subject: [PATCH 28/30] setting 1-row DataFrame is deprecated but still works --- test/deprecated.jl | 3 +++ test/indexing.jl | 4 ---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/test/deprecated.jl b/test/deprecated.jl index 35781ab253..b507f36252 100644 --- a/test/deprecated.jl +++ b/test/deprecated.jl @@ -351,6 +351,9 @@ end end @testset "old setindex! tests" begin + df = DataFrame(reshape(1:12, 4, :)) + df[1, :] = df[1:1, :] + df = DataFrame(reshape(1:12, 4, :)) # Scalar broadcasting assignment of rows diff --git a/test/indexing.jl b/test/indexing.jl index 41502d2442..cb0d142b9c 100644 --- a/test/indexing.jl +++ b/test/indexing.jl @@ -1380,10 +1380,6 @@ end end @testset "additional setindex! tests" begin - df = DataFrame(reshape(1:12, 4, :)) - - @test_throws MethodError df[1, :] = df[1:1, :] - df[1:2, :] = df[3:4, :] @test df == DataFrame([3 7 11 4 8 12 From eb4340d6dbb7eed597c57e9288d52823da7d376c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 2 Sep 2019 20:31:02 +0200 Subject: [PATCH 29/30] add missing line --- test/indexing.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/indexing.jl b/test/indexing.jl index cb0d142b9c..d91fe53fdf 100644 --- a/test/indexing.jl +++ b/test/indexing.jl @@ -1380,6 +1380,7 @@ end end @testset "additional setindex! tests" begin + df = DataFrame(reshape(1:12, 4, :)) df[1:2, :] = df[3:4, :] @test df == DataFrame([3 7 11 4 8 12 From 99f669a2ff98b66177591e8f2fc85def4f7d6103 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 2 Sep 2019 22:40:43 +0200 Subject: [PATCH 30/30] fix another test --- test/indexing.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/indexing.jl b/test/indexing.jl index d91fe53fdf..7e640ba85b 100644 --- a/test/indexing.jl +++ b/test/indexing.jl @@ -1393,7 +1393,7 @@ end 4 8 12 4 8 12]) - @test_throws DimensionMismatch df[1, :] = 1 + @test_throws MethodError df[1, :] = 1 df[:, 2] = ones(4) @test df == DataFrame([4 1 12