From bc66076695cad2d568c8891a7e65afd7014c978a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 29 Mar 2022 12:23:22 +0200 Subject: [PATCH 01/10] add expandgrid --- docs/src/lib/functions.md | 1 + src/DataFrames.jl | 1 + src/dataframe/dataframe.jl | 91 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+) diff --git a/docs/src/lib/functions.md b/docs/src/lib/functions.md index fb626bc019..d90e5c89bc 100644 --- a/docs/src/lib/functions.md +++ b/docs/src/lib/functions.md @@ -44,6 +44,7 @@ Pages = ["functions.md"] ## Constructing data frames ```@docs copy +expandgrid similar ``` diff --git a/src/DataFrames.jl b/src/DataFrames.jl index 4911212922..305dec9b0e 100644 --- a/src/DataFrames.jl +++ b/src/DataFrames.jl @@ -49,6 +49,7 @@ export AbstractDataFrame, disallowmissing!, dropmissing!, dropmissing, + expandgrid, fillcombinations, flatten, groupby, diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index 12cd9bdced..8b79df29e4 100755 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -1806,3 +1806,94 @@ function _replace_columns!(df::DataFrame, newdf::DataFrame) copy!(index(df).lookup, index(newdf).lookup) return df end + +expandgrid(; kwargs...) = isempty(kwargs) ? DataFrame() : expandgrid(kwargs...) + +expandgrid(pairs::Pair{<:AbstractString, <:Any}...) = + expandgrid((Symbol(k) => v for (k,v) in pairs)...) + +""" + expandgrid(pairs::Pair...) + expandgrid(kwargs...) + +Create a `DataFrame`` from all combinations of the passed arguments. + +It is allowed to pass a list of `Pair`s as positional +arguments, or a list of keyword arguments. Each `Pair`` is considered +to represent a column name to column values to expand mapping. +Column name must be a `Symbol` or string. All passed column names must be unique. + +Column value can be a vector which is consumed as is or an object of any other +type (except `AbstractArray`). In the latter case the passed value is treated +as having length one for expansion. As a particular rule values stored in a `Ref` +or a `0`-dimensional `AbstractArray` are unwrapped and treated as having length one. + +`DataFrame` can store only columns that use 1-based indexing. Attempting +to store a vector using non-standard indexing after `repeat` is called on it +will raise an error. + +# Examples + +```jldoctest +julia> expandgrid(a=1:2, b='a':'c') +6×2 DataFrame + Row │ a b + │ Int64 Char +─────┼───────────── + 1 │ 1 a + 2 │ 2 a + 3 │ 1 b + 4 │ 2 b + 5 │ 1 c + 6 │ 2 c + +julia> expandgrid("a" => 1:2, "b" => 'a':'c', "c" => "const") +6×3 DataFrame + Row │ a b c + │ Int64 Char String +─────┼───────────────────── + 1 │ 1 a const + 2 │ 2 a const + 3 │ 1 b const + 4 │ 2 b const + 5 │ 1 c const + 6 │ 2 c const +``` +""" +function expandgrid(pairs::Pair{Symbol, <:Any}...) + colnames = first.(pairs) + if !allunique(colnames) + throw(ArgumentError("All column names passed to expandgrid must be unique")) + end + colvalues = map(pairs) do p + v = last(p) + if v isa AbstractVector + return v + elseif v isa Union{AbstractArray{<:Any, 0}, Ref} + x = v[] + return fill!(Tables.allocatecolumn(typeof(x), 1), x) + elseif v isa AbstractArray + throw(ArgumentError("adding AbstractArray other than AbstractVector " * + "as a column of a data frame is not allowed")) + else + return fill!(Tables.allocatecolumn(typeof(v), 1), v) + end + end + @assert length(colvalues) == length(colnames) + @assert all(x -> x isa AbstractVector, colvalues) + + target_rows = Int(prod(x -> big(length(x)), colvalues)) + out_df = DataFrame() + inner = 1 + for (val, cname) in zip(colvalues, colnames) + len = length(val) + last_inner = inner + inner *= len + outer, remv = inner == 0 ? (0, 0) : divrem(target_rows, inner) + @assert iszero(remv) + out_df[!, cname] = repeat(val, inner=last_inner, outer=outer) + end + @assert inner == target_rows + @assert size(out_df) == (target_rows, length(colnames)) + return out_df +end From fbb51b3d9d7dd4d1b9335f597e51ebe3bedd3594 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 29 Mar 2022 22:50:55 +0200 Subject: [PATCH 02/10] Apply suggestions from code review Co-authored-by: Milan Bouchet-Valat --- src/dataframe/dataframe.jl | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index 8b79df29e4..b533a2a930 100755 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -1814,14 +1814,13 @@ expandgrid(pairs::Pair{<:AbstractString, <:Any}...) = """ expandgrid(pairs::Pair...) - expandgrid(kwargs...) + expandgrid(; kwargs...) -Create a `DataFrame`` from all combinations of the passed arguments. +Create a `DataFrame` from all combinations of values in passed arguments. -It is allowed to pass a list of `Pair`s as positional -arguments, or a list of keyword arguments. Each `Pair`` is considered -to represent a column name to column values to expand mapping. -Column name must be a `Symbol` or string. All passed column names must be unique. +Arguments associating a column name with values to expand can be specified +either as `Pair`s passed as positional arguments, or as keyword arguments. +Column names must be `Symbol`s or strings and must be unique. Column value can be a vector which is consumed as is or an object of any other type (except `AbstractArray`). In the latter case the passed value is treated From ec185df4fddc7d9e7ddc9edf50cc2899094dc289 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sat, 2 Apr 2022 11:49:42 +0200 Subject: [PATCH 03/10] remove unnecessary comment --- src/dataframe/dataframe.jl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index 8b79df29e4..6e5362485a 100755 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -1828,10 +1828,6 @@ type (except `AbstractArray`). In the latter case the passed value is treated as having length one for expansion. As a particular rule values stored in a `Ref` or a `0`-dimensional `AbstractArray` are unwrapped and treated as having length one. -`DataFrame` can store only columns that use 1-based indexing. Attempting -to store a vector using non-standard indexing after `repeat` is called on it -will raise an error. - # Examples ```jldoctest From 24395b7b0953868f82d6bb8f55a39d1d62611ef9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sat, 2 Apr 2022 18:47:48 +0200 Subject: [PATCH 04/10] add NEWS.md and tests --- NEWS.md | 5 ++- docs/src/lib/functions.md | 2 +- src/DataFrames.jl | 2 +- src/dataframe/dataframe.jl | 18 +++++----- test/data.jl | 68 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 83 insertions(+), 12 deletions(-) diff --git a/NEWS.md b/NEWS.md index ebc24cccc8..b9cdd1ca74 100644 --- a/NEWS.md +++ b/NEWS.md @@ -22,13 +22,16 @@ * Add `fillcombinations` function that generates all combinations of levels of selected columns of a data frame ([#3012](https://github.com/JuliaData/DataFrames.jl/issues/3012)) +* Add `allcombinations` function that returns a data frame created + from all combinations of the passed vectors + ([#3031](https://github.com/JuliaData/DataFrames.jl/pull/3031)) ## Previously announced breaking changes * On Julia 1.7 or newer broadcasting assignment into an existing column of a data frame replaces it. Under Julia 1.6 or older it is an in place operation. - ([#3022](https://github.com/JuliaData/DataFrames.jl/pull/3022) + ([#3022](https://github.com/JuliaData/DataFrames.jl/pull/3022)) # DataFrames.jl v1.3.2 Patch Release Notes diff --git a/docs/src/lib/functions.md b/docs/src/lib/functions.md index d90e5c89bc..f83df96271 100644 --- a/docs/src/lib/functions.md +++ b/docs/src/lib/functions.md @@ -43,8 +43,8 @@ Pages = ["functions.md"] ## Constructing data frames ```@docs +allcombinations copy -expandgrid similar ``` diff --git a/src/DataFrames.jl b/src/DataFrames.jl index 305dec9b0e..4e955b04f5 100644 --- a/src/DataFrames.jl +++ b/src/DataFrames.jl @@ -39,6 +39,7 @@ export AbstractDataFrame, GroupedDataFrame, SubDataFrame, Tables, + allcombinations, allowmissing!, antijoin, columnindex, @@ -49,7 +50,6 @@ export AbstractDataFrame, disallowmissing!, dropmissing!, dropmissing, - expandgrid, fillcombinations, flatten, groupby, diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index 716dc5be06..86889441fc 100755 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -1807,14 +1807,14 @@ function _replace_columns!(df::DataFrame, newdf::DataFrame) return df end -expandgrid(; kwargs...) = isempty(kwargs) ? DataFrame() : expandgrid(kwargs...) +allcombinations(; kwargs...) = isempty(kwargs) ? DataFrame() : allcombinations(kwargs...) -expandgrid(pairs::Pair{<:AbstractString, <:Any}...) = - expandgrid((Symbol(k) => v for (k,v) in pairs)...) +allcombinations(pairs::Pair{<:AbstractString, <:Any}...) = + allcombinations((Symbol(k) => v for (k, v) in pairs)...) """ - expandgrid(pairs::Pair...) - expandgrid(; kwargs...) + allcombinations(pairs::Pair...) + allcombinations(; kwargs...) Create a `DataFrame` from all combinations of values in passed arguments. @@ -1830,7 +1830,7 @@ or a `0`-dimensional `AbstractArray` are unwrapped and treated as having length # Examples ```jldoctest -julia> expandgrid(a=1:2, b='a':'c') +julia> allcombinations(a=1:2, b='a':'c') 6×2 DataFrame Row │ a b │ Int64 Char @@ -1842,7 +1842,7 @@ julia> expandgrid(a=1:2, b='a':'c') 5 │ 1 c 6 │ 2 c -julia> expandgrid("a" => 1:2, "b" => 'a':'c', "c" => "const") +julia> allcombinations("a" => 1:2, "b" => 'a':'c', "c" => "const") 6×3 DataFrame Row │ a b c │ Int64 Char String @@ -1855,10 +1855,10 @@ julia> expandgrid("a" => 1:2, "b" => 'a':'c', "c" => "const") 6 │ 2 c const ``` """ -function expandgrid(pairs::Pair{Symbol, <:Any}...) +function allcombinations(pairs::Pair{Symbol, <:Any}...) colnames = first.(pairs) if !allunique(colnames) - throw(ArgumentError("All column names passed to expandgrid must be unique")) + throw(ArgumentError("All column names passed to allcombinations must be unique")) end colvalues = map(pairs) do p v = last(p) diff --git a/test/data.jl b/test/data.jl index f17db1f83f..d4ae992d5f 100644 --- a/test/data.jl +++ b/test/data.jl @@ -615,4 +615,72 @@ end @test_throws ArgumentError fillcombinations(df, 2) end +@testset "allcombinations" begin + @test allcombinations() == DataFrame() + @test allcombinations(a=1:2, b=3:4) == + allcombinations("a" => 1:2, "b" => 3:4) == + allcombinations(:a => 1:2, :b => 3:4) == + DataFrame(a=[1, 2, 1, 2], b=[3, 3, 4, 4]) + @test_throws MethodError allcombinations("a" => 1:2, :b => 3:4) + @test_throws ArgumentError allcombinations("a" => 1:2, "a" => 3:4) + + res = allcombinations(a=categorical(["a", "b", "a"], levels=["c", "b", "a"])) + @test res == DataFrame(a=["a", "b", "a"]) + @test res.a isa CategoricalVector + @test levels(res.a) == ["c", "b", "a"] + + @test allcombinations(a=categorical(["a", "b", "a"]), + b=Ref([1, 2]), + c=fill(1:2), + d=DataFrame(p=1, q=2)) == + DataFrame(a=categorical(["a", "b", "a"]), + b=Ref([1, 2]), + c=fill(1:2), + d=DataFrame(p=1, q=2)) + @test allcombinations(a=categorical(["a", "b", "a"]), + b=Ref([1, 2]), + c=fill(1:2), + d=DataFrame(p=1, q=2), + e=1:2) == + DataFrame(a=categorical(["a", "b", "a", "a", "b", "a"]), + b=Ref([1, 2]), + c=fill(1:2), + d=DataFrame(p=1, q=2), + e=[1, 1, 1, 2, 2, 2]) + @test_throws ArgumentError allcombinations(a=[1 2; 3 4]) + + @test allcombinations(a=[1, 1, 1], b=[2, 2, 2]) == + DataFrame(a=fill(1, 9), b=fill(2, 9)) + @test allcombinations(a=[1, 1, 1], b='a':'b', c=[2, 2, 2]) == + DataFrame(a=fill(1, 18), b=repeat('a':'b', inner=3, outer=3), c=fill(2, 18)) + + res = allcombinations(b=categorical(String[], levels=["a"])) + @test nrow(res) == 0 + @test names(res) == ["b"] + @test typeof(res.b) <: CategoricalVector{String} + @test levels(res.b) == ["a"] + + res = allcombinations(b=categorical(String[], levels=["a"]), c='a':'b') + @test nrow(res) == 0 + @test names(res) == ["b", "c"] + @test typeof(res.b) <: CategoricalVector{String} + @test levels(res.b) == ["a"] + @test typeof(res.c) === Vector{Char} + + res = allcombinations(a=1:3, b=categorical(String[], levels=["a"])) + @test nrow(res) == 0 + @test names(res) == ["a", "b"] + @test typeof(res.a) === Vector{Int} + @test typeof(res.b) <: CategoricalVector{String} + @test levels(res.b) == ["a"] + + res = allcombinations(a=1:3, b=categorical(String[], levels=["a"]), c='a':'b') + @test nrow(res) == 0 + @test names(res) == ["a", "b", "c"] + @test typeof(res.a) === Vector{Int} + @test typeof(res.b) <: CategoricalVector{String} + @test levels(res.b) == ["a"] + @test typeof(res.c) === Vector{Char} +end + end # module From 7adc98285e37ff849457f3e5bd4ede9dda44bdbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sat, 23 Apr 2022 21:55:25 +0200 Subject: [PATCH 05/10] require DataFrame as a prefix --- src/dataframe/dataframe.jl | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index 266be9a2d1..9b367cf4f1 100755 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -1573,14 +1573,15 @@ function _replace_columns!(df::DataFrame, newdf::DataFrame) return df end -allcombinations(; kwargs...) = isempty(kwargs) ? DataFrame() : allcombinations(kwargs...) +allcombinations(::Type{DataFrame}; kwargs...) = + isempty(kwargs) ? DataFrame() : allcombinations(DataFrame, kwargs...) -allcombinations(pairs::Pair{<:AbstractString, <:Any}...) = +allcombinations(::Type{DataFrame}, pairs::Pair{<:AbstractString, <:Any}...) = allcombinations((Symbol(k) => v for (k, v) in pairs)...) """ - allcombinations(pairs::Pair...) - allcombinations(; kwargs...) + allcombinations(DataFrame, pairs::Pair...) + allcombinations(DataFrame, ; kwargs...) Create a `DataFrame` from all combinations of values in passed arguments. @@ -1596,7 +1597,7 @@ or a `0`-dimensional `AbstractArray` are unwrapped and treated as having length # Examples ```jldoctest -julia> allcombinations(a=1:2, b='a':'c') +julia> allcombinations(DataFrame, a=1:2, b='a':'c') 6×2 DataFrame Row │ a b │ Int64 Char @@ -1608,7 +1609,7 @@ julia> allcombinations(a=1:2, b='a':'c') 5 │ 1 c 6 │ 2 c -julia> allcombinations("a" => 1:2, "b" => 'a':'c', "c" => "const") +julia> allcombinations(DataFrame, "a" => 1:2, "b" => 'a':'c', "c" => "const") 6×3 DataFrame Row │ a b c │ Int64 Char String @@ -1621,7 +1622,7 @@ julia> allcombinations("a" => 1:2, "b" => 'a':'c', "c" => "const") 6 │ 2 c const ``` """ -function allcombinations(pairs::Pair{Symbol, <:Any}...) +function allcombinations(::Type{DataFrame}, pairs::Pair{Symbol, <:Any}...) colnames = first.(pairs) if !allunique(colnames) throw(ArgumentError("All column names passed to allcombinations must be unique")) From cd037aa2e0359fd1177f45a64ecb1fa1378db9d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sat, 23 Apr 2022 23:51:11 +0200 Subject: [PATCH 06/10] update tests --- test/data.jl | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/test/data.jl b/test/data.jl index d4ae992d5f..2a1b7fe478 100644 --- a/test/data.jl +++ b/test/data.jl @@ -616,20 +616,20 @@ end end @testset "allcombinations" begin - @test allcombinations() == DataFrame() - @test allcombinations(a=1:2, b=3:4) == - allcombinations("a" => 1:2, "b" => 3:4) == - allcombinations(:a => 1:2, :b => 3:4) == + @test allcombinations(DataFrame) == DataFrame() + @test allcombinations(DataFrame, a=1:2, b=3:4) == + allcombinations(DataFrame, "a" => 1:2, "b" => 3:4) == + allcombinations(DataFrame, :a => 1:2, :b => 3:4) == DataFrame(a=[1, 2, 1, 2], b=[3, 3, 4, 4]) - @test_throws MethodError allcombinations("a" => 1:2, :b => 3:4) - @test_throws ArgumentError allcombinations("a" => 1:2, "a" => 3:4) + @test_throws MethodError allcombinations(DataFrame, "a" => 1:2, :b => 3:4) + @test_throws ArgumentError allcombinations(DataFrame, "a" => 1:2, "a" => 3:4) - res = allcombinations(a=categorical(["a", "b", "a"], levels=["c", "b", "a"])) + res = allcombinations(DataFrame, a=categorical(["a", "b", "a"], levels=["c", "b", "a"])) @test res == DataFrame(a=["a", "b", "a"]) @test res.a isa CategoricalVector @test levels(res.a) == ["c", "b", "a"] - @test allcombinations(a=categorical(["a", "b", "a"]), + @test allcombinations(DataFrame, a=categorical(["a", "b", "a"]), b=Ref([1, 2]), c=fill(1:2), d=DataFrame(p=1, q=2)) == @@ -637,7 +637,7 @@ end b=Ref([1, 2]), c=fill(1:2), d=DataFrame(p=1, q=2)) - @test allcombinations(a=categorical(["a", "b", "a"]), + @test allcombinations(DataFrame, a=categorical(["a", "b", "a"]), b=Ref([1, 2]), c=fill(1:2), d=DataFrame(p=1, q=2), @@ -647,34 +647,34 @@ end c=fill(1:2), d=DataFrame(p=1, q=2), e=[1, 1, 1, 2, 2, 2]) - @test_throws ArgumentError allcombinations(a=[1 2; 3 4]) + @test_throws ArgumentError allcombinations(DataFrame, a=[1 2; 3 4]) - @test allcombinations(a=[1, 1, 1], b=[2, 2, 2]) == + @test allcombinations(DataFrame, a=[1, 1, 1], b=[2, 2, 2]) == DataFrame(a=fill(1, 9), b=fill(2, 9)) - @test allcombinations(a=[1, 1, 1], b='a':'b', c=[2, 2, 2]) == + @test allcombinations(DataFrame, a=[1, 1, 1], b='a':'b', c=[2, 2, 2]) == DataFrame(a=fill(1, 18), b=repeat('a':'b', inner=3, outer=3), c=fill(2, 18)) - res = allcombinations(b=categorical(String[], levels=["a"])) + res = allcombinations(DataFrame, b=categorical(String[], levels=["a"])) @test nrow(res) == 0 @test names(res) == ["b"] @test typeof(res.b) <: CategoricalVector{String} @test levels(res.b) == ["a"] - res = allcombinations(b=categorical(String[], levels=["a"]), c='a':'b') + res = allcombinations(DataFrame, b=categorical(String[], levels=["a"]), c='a':'b') @test nrow(res) == 0 @test names(res) == ["b", "c"] @test typeof(res.b) <: CategoricalVector{String} @test levels(res.b) == ["a"] @test typeof(res.c) === Vector{Char} - res = allcombinations(a=1:3, b=categorical(String[], levels=["a"])) + res = allcombinations(DataFrame, a=1:3, b=categorical(String[], levels=["a"])) @test nrow(res) == 0 @test names(res) == ["a", "b"] @test typeof(res.a) === Vector{Int} @test typeof(res.b) <: CategoricalVector{String} @test levels(res.b) == ["a"] - res = allcombinations(a=1:3, b=categorical(String[], levels=["a"]), c='a':'b') + res = allcombinations(DataFrame, a=1:3, b=categorical(String[], levels=["a"]), c='a':'b') @test nrow(res) == 0 @test names(res) == ["a", "b", "c"] @test typeof(res.a) === Vector{Int} From a51396b85b9bd5cb7f613496eb891760da2a50c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 24 Apr 2022 08:57:14 +0200 Subject: [PATCH 07/10] fix one method --- src/dataframe/dataframe.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index 9b367cf4f1..a655ea25e9 100755 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -1577,7 +1577,7 @@ allcombinations(::Type{DataFrame}; kwargs...) = isempty(kwargs) ? DataFrame() : allcombinations(DataFrame, kwargs...) allcombinations(::Type{DataFrame}, pairs::Pair{<:AbstractString, <:Any}...) = - allcombinations((Symbol(k) => v for (k, v) in pairs)...) + allcombinations(DataFrame, (Symbol(k) => v for (k, v) in pairs)...) """ allcombinations(DataFrame, pairs::Pair...) From b67467b4facc6d3a2fffa7e95ea977175ec5347c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 24 Apr 2022 09:04:06 +0200 Subject: [PATCH 08/10] up DataAPI.jl version requirement --- Project.toml | 2 +- src/DataFrames.jl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 5b8e3e5dce..ebc02f7ce2 100644 --- a/Project.toml +++ b/Project.toml @@ -26,7 +26,7 @@ Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" [compat] CategoricalArrays = "0.10.0" Compat = "3.17" -DataAPI = "1.9" +DataAPI = "1.10" InvertedIndices = "1" IteratorInterfaceExtensions = "0.1.1, 1" Missings = "0.4.2, 1" diff --git a/src/DataFrames.jl b/src/DataFrames.jl index f3a46ce05b..63b6b77a01 100644 --- a/src/DataFrames.jl +++ b/src/DataFrames.jl @@ -11,6 +11,7 @@ using PrettyTables using Random import DataAPI, + DataAPI.allcombinations, DataAPI.All, DataAPI.Between, DataAPI.Cols, From a42aca2971676ec63149986290e7c2b48a719854 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 24 Apr 2022 09:10:42 +0200 Subject: [PATCH 09/10] use skipmissing=false in levels --- src/abstractdataframe/abstractdataframe.jl | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl index a753959b7c..cbfeec278b 100644 --- a/src/abstractdataframe/abstractdataframe.jl +++ b/src/abstractdataframe/abstractdataframe.jl @@ -1519,14 +1519,8 @@ function fillcombinations(df::AbstractDataFrame, indexcols; # Create a vector of vectors of unique values in each column uniquevals = [] for col in colind - # levels drops missing, handle the case where missing values are present # All levels are retained, missing is added only if present - # TODO: change this after DataAPI.jl levels supports missing - if any(ismissing, df[!, col]) - tempcol = vcat(levels(df[!, col]), missing) - else - tempcol = levels(df[!, col]) - end + tempcol = levels(df[!, col], skipmissing=false) push!(uniquevals, tempcol) end @@ -2528,12 +2522,12 @@ function _permutation_helper!(fun::Union{typeof(Base.permute!!), typeof(Base.inv nrow(df) != length(p) && throw(DimensionMismatch("Permutation does not have a correct length " * "(expected $(nrow(df)) but got $(length(p)))")) - + cp = _compile_permutation!(Base.copymutable(p)) isempty(cp) && return df - if fun === Base.invpermute!! + if fun === Base.invpermute!! reverse!(@view cp[1:end-1]) end @@ -2544,14 +2538,14 @@ function _permutation_helper!(fun::Union{typeof(Base.permute!!), typeof(Base.inv _cycle_permute!(col, cp) end end - + return df end -# convert a classical permutation to zero terminated cycle +# convert a classical permutation to zero terminated cycle # notation, zeroing the original permutation in the process. function _compile_permutation!(p::AbstractVector{<:Integer}) - firstindex(p) == 1 || + firstindex(p) == 1 || throw(ArgumentError("Permutation vectors must have 1-based indexing")) # this length is sufficient because we do not record 1-cycles, # so the worst case is all 2-cycles. One extra element gives the From f542f675d39f9a69f754c39da771cfb0dd0aa7c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 25 Apr 2022 10:41:55 +0200 Subject: [PATCH 10/10] Update src/dataframe/dataframe.jl Co-authored-by: Milan Bouchet-Valat --- src/dataframe/dataframe.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index a655ea25e9..cf5917a29b 100755 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -1581,7 +1581,7 @@ allcombinations(::Type{DataFrame}, pairs::Pair{<:AbstractString, <:Any}...) = """ allcombinations(DataFrame, pairs::Pair...) - allcombinations(DataFrame, ; kwargs...) + allcombinations(DataFrame; kwargs...) Create a `DataFrame` from all combinations of values in passed arguments.