diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl index c8e87d13de..e18eb63d05 100644 --- a/src/abstractdataframe/abstractdataframe.jl +++ b/src/abstractdataframe/abstractdataframe.jl @@ -458,7 +458,7 @@ where each row represents a variable and each column a summary statistic. - A symbol from the list `:mean`, `:std`, `:min`, `:q25`, `:median`, `:q75`, `:max`, `:eltype`, `:nunique`, `:first`, `:last`, and `:nmissing`. The default statistics used are `:mean`, `:min`, `:median`, - `:max`, `:nunique`, `:nmissing`, and `:eltype`. + `:max`, `:nmissing`, and `:eltype`. - `:all` as the only `Symbol` argument to return all statistics. - A `name => function` pair where `name` is a `Symbol` or string. This will create a column of summary statistics with the provided name. @@ -489,30 +489,16 @@ access missing values. # Examples ```julia -julia> df = DataFrame(i=1:10, x=0.1:0.1:1.0, y='a':'j') -10×3 DataFrame -│ Row │ i │ x │ y │ -│ │ Int64 │ Float64 │ Char │ -├─────┼───────┼─────────┼──────┤ -│ 1 │ 1 │ 0.1 │ 'a' │ -│ 2 │ 2 │ 0.2 │ 'b' │ -│ 3 │ 3 │ 0.3 │ 'c' │ -│ 4 │ 4 │ 0.4 │ 'd' │ -│ 5 │ 5 │ 0.5 │ 'e' │ -│ 6 │ 6 │ 0.6 │ 'f' │ -│ 7 │ 7 │ 0.7 │ 'g' │ -│ 8 │ 8 │ 0.8 │ 'h' │ -│ 9 │ 9 │ 0.9 │ 'i' │ -│ 10 │ 10 │ 1.0 │ 'j' │ +julia> df = DataFrame(i=1:10, x=0.1:0.1:1.0, y='a':'j'); julia> describe(df) -3×8 DataFrame -│ Row │ variable │ mean │ min │ median │ max │ nunique │ nmissing │ eltype │ -│ │ Symbol │ Union… │ Any │ Union… │ Any │ Union… │ Nothing │ DataType │ -├─────┼──────────┼────────┼─────┼────────┼─────┼─────────┼──────────┼──────────┤ -│ 1 │ i │ 5.5 │ 1 │ 5.5 │ 10 │ │ │ Int64 │ -│ 2 │ x │ 0.55 │ 0.1 │ 0.55 │ 1.0 │ │ │ Float64 │ -│ 3 │ y │ │ 'a' │ │ 'j' │ 10 │ │ Char │ +3×7 DataFrame +│ Row │ variable │ mean │ min │ median │ max │ nmissing │ eltype │ +│ │ Symbol │ Union… │ Any │ Union… │ Any │ Nothing │ DataType │ +├─────┼──────────┼────────┼─────┼────────┼─────┼──────────┼──────────┤ +│ 1 │ i │ 5.5 │ 1 │ 5.5 │ 10 │ │ Int64 │ +│ 2 │ x │ 0.55 │ 0.1 │ 0.55 │ 1.0 │ │ Float64 │ +│ 3 │ y │ │ 'a' │ │ 'j' │ │ Char │ julia> describe(df, :min, :max) 3×3 DataFrame @@ -547,7 +533,7 @@ DataAPI.describe(df::AbstractDataFrame, DataAPI.describe(df::AbstractDataFrame; cols=:) = _describe(select(df, cols, copycols=false), - [:mean, :min, :median, :max, :nunique, :nmissing, :eltype]) + [:mean, :min, :median, :max, :nmissing, :eltype]) function _describe(df::AbstractDataFrame, stats::AbstractVector) predefined_funs = Symbol[s for s in stats if s isa Symbol] @@ -655,7 +641,7 @@ function get_stats(col::AbstractVector, stats::AbstractVector{Symbol}) if eltype(col) <: Real d[:nunique] = nothing else - d[:nunique] = try length(unique(col)) catch end + d[:nunique] = try length(Set(col)) catch end end end diff --git a/test/dataframe.jl b/test/dataframe.jl index b2fc337bd1..83ab0da52f 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -641,7 +641,7 @@ end eltype = [Int, Union{Missing, Int}, String, Union{Missing, String}, Date, CategoricalValue{Int, UInt32}]) - default_fields = [:mean, :min, :median, :max, :nunique, :nmissing, :eltype] + default_fields = [:mean, :min, :median, :max, :nmissing, :eltype] # Test that it works as a whole, without keyword arguments @test describe_output[:, [:variable; default_fields]] == describe(df)