From d5f8bfe92ec2e1107a5379fc3e6d6dfdd2ce144a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 24 Jan 2023 23:11:15 +0100 Subject: [PATCH] correctly index into a SubDataFrame with no columns --- NEWS.md | 3 +++ src/subdataframe/subdataframe.jl | 36 ++++++++++++++++++---------- test/subdataframe.jl | 41 ++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 13 deletions(-) diff --git a/NEWS.md b/NEWS.md index f268eccb2a..c1cc03dbaa 100644 --- a/NEWS.md +++ b/NEWS.md @@ -28,6 +28,9 @@ * fixed error in fast aggregation in `sum` and `mean` of columns only having `missing` values ([#3268](https://github.com/JuliaData/DataFrames.jl/pull/3268)) +* fixed error in indexing of `SubDataFrame` that has no columns selected from + its parent + ([#3273](https://github.com/JuliaData/DataFrames.jl/pull/3273)) # DataFrames.jl v1.4.4 Patch Release Notes diff --git a/src/subdataframe/subdataframe.jl b/src/subdataframe/subdataframe.jl index 8ba7538084..3d18fc8e9c 100644 --- a/src/subdataframe/subdataframe.jl +++ b/src/subdataframe/subdataframe.jl @@ -110,16 +110,23 @@ Base.@propagate_inbounds function SubDataFrame(parent::DataFrame, rows::Abstract return SubDataFrame(parent, convert(Vector{Int}, rows), cols) end -Base.@propagate_inbounds SubDataFrame(sdf::SubDataFrame, rowind, cols) = - SubDataFrame(parent(sdf), rows(sdf)[rowind], parentcols(index(sdf), cols)) +Base.@propagate_inbounds function SubDataFrame(sdf::SubDataFrame, rowind, cols) + new_rows = ncol(sdf) == 0 ? (Int[])[rowind] : rows(sdf)[rowind] + return SubDataFrame(parent(sdf), new_rows, parentcols(index(sdf), cols)) +end + Base.@propagate_inbounds SubDataFrame(sdf::SubDataFrame, rowind::Bool, cols) = throw(ArgumentError("invalid row index of type Bool")) -Base.@propagate_inbounds SubDataFrame(sdf::SubDataFrame, rowind, ::Colon) = + +Base.@propagate_inbounds function SubDataFrame(sdf::SubDataFrame, rowind, ::Colon) + new_rows = ncol(sdf) == 0 ? (Int[])[rowind] : rows(sdf)[rowind] if index(sdf) isa Index # sdf was created using : as row selector - SubDataFrame(parent(sdf), rows(sdf)[rowind], :) + return SubDataFrame(parent(sdf), new_rows, :) else - SubDataFrame(parent(sdf), rows(sdf)[rowind], parentcols(index(sdf), :)) + return SubDataFrame(parent(sdf), new_rows, parentcols(index(sdf), :)) end +end + Base.@propagate_inbounds SubDataFrame(sdf::SubDataFrame, rowind::Bool, ::Colon) = throw(ArgumentError("invalid row index of type Bool")) Base.@propagate_inbounds SubDataFrame(sdf::SubDataFrame, ::Colon, cols) = @@ -157,27 +164,30 @@ index(sdf::SubDataFrame) = getfield(sdf, :colindex) nrow(sdf::SubDataFrame) = ncol(sdf) > 0 ? length(rows(sdf))::Int : 0 Base.@propagate_inbounds Base.getindex(sdf::SubDataFrame, rowind::Integer, colind::ColumnIndex) = - parent(sdf)[rows(sdf)[rowind], parentcols(index(sdf), colind)] + parent(sdf)[!, parentcols(index(sdf), colind)][rows(sdf)[rowind]] Base.@propagate_inbounds Base.getindex(sdf::SubDataFrame, rowind::Bool, colind::ColumnIndex) = throw(ArgumentError("invalid row index of type Bool")) - Base.@propagate_inbounds Base.getindex(sdf::SubDataFrame, rowinds::Union{AbstractVector, Not}, colind::ColumnIndex) = - parent(sdf)[rows(sdf)[rowinds], parentcols(index(sdf), colind)] + parent(sdf)[!, parentcols(index(sdf), colind)][rows(sdf)[rowinds]] Base.@propagate_inbounds Base.getindex(sdf::SubDataFrame, ::Colon, colind::ColumnIndex) = parent(sdf)[rows(sdf), parentcols(index(sdf), colind)] Base.@propagate_inbounds Base.getindex(sdf::SubDataFrame, ::typeof(!), colind::ColumnIndex) = view(parent(sdf), rows(sdf), parentcols(index(sdf), colind)) -Base.@propagate_inbounds Base.getindex(sdf::SubDataFrame, rowinds::Union{AbstractVector, Not}, - colinds::MultiColumnIndex) = - parent(sdf)[rows(sdf)[rowinds], parentcols(index(sdf), colinds)] +Base.@propagate_inbounds function Base.getindex(sdf::SubDataFrame, + rowinds::Union{AbstractVector, Not}, + colinds::MultiColumnIndex) + new_rows = ncol(sdf) == 0 ? (Int[])[rowinds] : rows(sdf)[rowinds] + return parent(sdf)[new_rows, parentcols(index(sdf), colinds)] +end + Base.@propagate_inbounds Base.getindex(sdf::SubDataFrame, ::Colon, colinds::MultiColumnIndex) = parent(sdf)[rows(sdf), parentcols(index(sdf), colinds)] -Base.@propagate_inbounds Base.getindex(df::SubDataFrame, row_ind::typeof(!), +Base.@propagate_inbounds Base.getindex(sdf::SubDataFrame, row_ind::typeof(!), col_inds::MultiColumnIndex) = - select(df, index(df)[col_inds], copycols=false) + select(sdf, index(sdf)[col_inds], copycols=false) Base.@propagate_inbounds function Base.setindex!(sdf::SubDataFrame, val::Any, idx::CartesianIndex{2}) return setindex!(sdf, val, idx[1], idx[2]) diff --git a/test/subdataframe.jl b/test/subdataframe.jl index 74b9bfd0c1..adb5d7882b 100644 --- a/test/subdataframe.jl +++ b/test/subdataframe.jl @@ -307,4 +307,45 @@ end @test_throws ArgumentError SubDataFrame(sdf, DataFrames.index(sdf), [1]) end +@testset "subdataframe with no columns" begin + df = DataFrame(a=1:3, b=4:6) + dfv = @view df[[3, 1], 2:1] + @test nrow(SubDataFrame(dfv, Int[], :)) == 0 + @test nrow(SubDataFrame(dfv, Bool[], :)) == 0 + @test nrow(SubDataFrame(dfv, :, :)) == 0 + @test nrow(SubDataFrame(dfv, Not(:), :)) == 0 + @test nrow(SubDataFrame(dfv, Int[], [])) == 0 + @test nrow(SubDataFrame(dfv, Bool[], [])) == 0 + @test nrow(SubDataFrame(dfv, :, [])) == 0 + @test nrow(SubDataFrame(dfv, Not(:), [])) == 0 + @test_throws BoundsError SubDataFrame(dfv, [1, 2], :) + + @test nrow(view(dfv, Int[], :)) == 0 + @test nrow(view(dfv, Bool[], :)) == 0 + @test nrow(view(dfv, :, :)) == 0 + @test nrow(view(dfv, !, :)) == 0 + @test nrow(view(dfv, Not(:), :)) == 0 + @test nrow(view(dfv, Int[], [])) == 0 + @test nrow(view(dfv, Bool[], [])) == 0 + @test nrow(view(dfv, :, [])) == 0 + @test nrow(view(dfv, !, [])) == 0 + @test nrow(view(dfv, Not(:), [])) == 0 + @test_throws BoundsError view(dfv, [1, 2], :) + + @test_throws BoundsError dfv[1, 1] + @test_throws BoundsError dfv[[1, 2], 1] + + @test nrow(getindex(dfv, Int[], :)) == 0 + @test nrow(getindex(dfv, Bool[], :)) == 0 + @test nrow(getindex(dfv, :, :)) == 0 + @test nrow(getindex(dfv, !, :)) == 0 + @test nrow(getindex(dfv, Not(:), :)) == 0 + @test nrow(getindex(dfv, Int[], [])) == 0 + @test nrow(getindex(dfv, Bool[], [])) == 0 + @test nrow(getindex(dfv, :, [])) == 0 + @test nrow(getindex(dfv, !, [])) == 0 + @test nrow(getindex(dfv, Not(:), [])) == 0 + @test_throws BoundsError getindex(dfv, [1, 2], :) +end + end # module