Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Provide formatter for labeling categories in cut function #202

Merged
merged 10 commits into from
Sep 20, 2019
28 changes: 20 additions & 8 deletions src/extras.jl
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@ function fill_refs!(refs::AbstractArray, X::AbstractArray{>: Missing},
end
end

"""
_default_formatter_
greimel marked this conversation as resolved.
Show resolved Hide resolved

Provide the default label format for the `cut` function.
"""
_default_formatter_(from, to, i; extend=false) = string("[", from, ", ", to, extend ? "]" : ")")

"""
cut(x::AbstractArray, breaks::AbstractVector;
extend::Bool=false, labels::AbstractVector=[], allow_missing::Bool=false)
greimel marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -56,14 +63,16 @@ also accept them.
* `extend::Bool=false`: when `false`, an error is raised if some values in `x` fall
outside of the breaks; when `true`, breaks are automatically added to include all
values in `x`, and the upper bound is included in the last interval.
* `labels::AbstractVector=[]`: a vector of strings giving the names to use for the
intervals; if empty, default labels are used.
* `labels::Union{AbstractVector,Function}=_default_formatter_`: a vector of strings giving the names to use for the
greimel marked this conversation as resolved.
Show resolved Hide resolved
intervals; or a function `f(from,to,i;extend=false)` that generates the labels from the left and right interval boundaries and the group index. Defaults to `string("[", from, ", ", to, extend ? "]" : ")")`, e.g. `"[1, 5)"`.
greimel marked this conversation as resolved.
Show resolved Hide resolved
* `allow_missing::Bool=true`: when `true`, values outside of breaks result in missing values.
only supported when `x` accepts missing values.
"""
function cut(x::AbstractArray{T, N}, breaks::AbstractVector;
extend::Bool=false, labels::AbstractVector{U}=String[],
extend::Bool=false, labels=_default_formatter_,
greimel marked this conversation as resolved.
Show resolved Hide resolved
allow_missing::Bool=false) where {T, N, U<:AbstractString}
greimel marked this conversation as resolved.
Show resolved Hide resolved
(labels isa AbstractVector) || (labels isa Function) || throw(ArgumentError("labels must be a formatter function or an AbstractVector"))
greimel marked this conversation as resolved.
Show resolved Hide resolved

greimel marked this conversation as resolved.
Show resolved Hide resolved
if !issorted(breaks)
breaks = sort(breaks)
end
Expand Down Expand Up @@ -92,7 +101,7 @@ function cut(x::AbstractArray{T, N}, breaks::AbstractVector;
end

n = length(breaks)
if isempty(labels)
if labels isa Function
@static if VERSION >= v"0.7.0-DEV.4524"
from = map(x -> sprint(show, x, context=:compact=>true), breaks[1:n-1])
to = map(x -> sprint(show, x, context=:compact=>true), breaks[2:n])
Expand All @@ -102,12 +111,12 @@ function cut(x::AbstractArray{T, N}, breaks::AbstractVector;
end
levs = Vector{String}(undef, n-1)
for i in 1:n-2
levs[i] = string("[", from[i], ", ", to[i], ")")
levs[i] = labels(from[i], to[i], i)
end
if extend
levs[end] = string("[", from[end], ", ", to[end], "]")
levs[end] = labels(from[end], to[end], n-1, extend=extend)
else
levs[end] = string("[", from[end], ", ", to[end], ")")
levs[end] = labels(from[end], to[end], n-1)
end
else
length(labels) == n-1 || throw(ArgumentError("labels must be of length $(n-1), but got length $(length(labels))"))
Expand All @@ -128,5 +137,8 @@ Cut a numeric array into `ngroups` quantiles, determined using
[`quantile`](@ref).
"""
cut(x::AbstractArray, ngroups::Integer;
labels::AbstractVector{U}=String[]) where {U<:AbstractString} =
labels=_default_formatter_) =
greimel marked this conversation as resolved.
Show resolved Hide resolved
cut(x, Statistics.quantile(x, (1:ngroups-1)/ngroups); extend=true, labels=labels)

greimel marked this conversation as resolved.
Show resolved Hide resolved


16 changes: 16 additions & 0 deletions test/15_extras.jl
Original file line number Diff line number Diff line change
Expand Up @@ -108,4 +108,20 @@ end
@test levels(x) == ["[2.0, 3.5)", "[3.5, 5.0]"]
end

@testset "formatter function" begin
greimel marked this conversation as resolved.
Show resolved Hide resolved
my_formatter1(from,to,i;extend=false) = "group $i"
greimel marked this conversation as resolved.
Show resolved Hide resolved
my_formatter2(from,to,i;extend=false) = "$i: $from -- $to"
function my_formatter3(from,to,i;extend=true)
percentile(x) = Int(round(100 * parse.(Float64,x),digits=0))
string("P",percentile(from),"P",percentile(to))
end

x = collect(0.15:0.20:0.95)
greimel marked this conversation as resolved.
Show resolved Hide resolved
p = [0, 0.4, 0.8, 1.0]

@test cut(x, p, labels=my_formatter1) == ["group 1", "group 1", "group 2", "group 2", "group 3"]
@test cut(x, p, labels=my_formatter2) == ["1: 0.0 -- 0.4", "1: 0.0 -- 0.4", "2: 0.4 -- 0.8", "2: 0.4 -- 0.8", "3: 0.8 -- 1.0"]
greimel marked this conversation as resolved.
Show resolved Hide resolved
@test cut(x, p, labels=my_formatter3) == ["P0P40" , "P0P40" , "P40P80" , "P40P80" , "P80P100"]
end

end