From 9dcea7b358f5885f1cc03652868640685bac7b8c Mon Sep 17 00:00:00 2001 From: piever Date: Mon, 31 Jan 2022 15:31:54 +0100 Subject: [PATCH 1/8] fix width in histogram bins --- src/scales.jl | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/scales.jl b/src/scales.jl index b2ff843e9..62fc1a479 100644 --- a/src/scales.jl +++ b/src/scales.jl @@ -92,11 +92,9 @@ function datetimeticks(f, datetimes::AbstractVector{<:TimeType}) end # Rescaling methods that do not depend on context -elementwise_rescale(value::TimeType) = datetime2float(value) -elementwise_rescale(value::Verbatim) = value[] -elementwise_rescale(value) = value - -contextfree_rescale(values) = map(elementwise_rescale, values) +contextfree_rescale(values::AbstractArray{<:TimeType}) = map(datetime2float, values) +contextfree_rescale(values::AbstractArray{<:Verbatim}) = map(getindex, values) +contextfree_rescale(values::AbstractArray) = values rescale(values, ::Nothing) = values From 96afa32df6b40b83b585773972d4aca4a97eb457 Mon Sep 17 00:00:00 2001 From: piever Date: Mon, 31 Jan 2022 15:32:11 +0100 Subject: [PATCH 2/8] bump version number --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 0af2b945a..d3b3b4369 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "AlgebraOfGraphics" uuid = "cbdf2221-f076-402e-a563-3d30da359d67" authors = ["Pietro Vertechi "] -version = "0.6.1" +version = "0.6.2" [deps] Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" From cb703788343423d74c40ab209d34ac8a3e2fbd38 Mon Sep 17 00:00:00 2001 From: piever Date: Mon, 31 Jan 2022 15:54:46 +0100 Subject: [PATCH 3/8] alternative fix to hist bin width --- src/algebra/layers.jl | 9 +++------ src/scales.jl | 8 +++++--- src/transformations/histogram.jl | 4 +++- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/algebra/layers.jl b/src/algebra/layers.jl index b90c0afa9..f25ecbbe0 100644 --- a/src/algebra/layers.jl +++ b/src/algebra/layers.jl @@ -56,12 +56,9 @@ function compute_attributes(pl::ProcessedLayer) merge!(attrs, Dictionary(valid_options(; color, cycle))) - # avoid automatic bar width computation in Makie - if (plottype <: BarPlot) && !haskey(attrs, :width) - x = first(positional) - width = (x isa AbstractRange) && (length(positional) == 2) ? step(x) : 1.0 - insert!(attrs, :width, width) - end + # avoid automatic bar width computation in Makie (issue #277) + # TODO: consider only implementing this when `x` is categorical + (plottype <: BarPlot) && !haskey(attrs, :width) && insert!(attrs, :width, 1) # remove unnecessary information return filterkeys(!in((:col, :row, :layout, :alpha)), attrs) diff --git a/src/scales.jl b/src/scales.jl index 62fc1a479..b2ff843e9 100644 --- a/src/scales.jl +++ b/src/scales.jl @@ -92,9 +92,11 @@ function datetimeticks(f, datetimes::AbstractVector{<:TimeType}) end # Rescaling methods that do not depend on context -contextfree_rescale(values::AbstractArray{<:TimeType}) = map(datetime2float, values) -contextfree_rescale(values::AbstractArray{<:Verbatim}) = map(getindex, values) -contextfree_rescale(values::AbstractArray) = values +elementwise_rescale(value::TimeType) = datetime2float(value) +elementwise_rescale(value::Verbatim) = value[] +elementwise_rescale(value) = value + +contextfree_rescale(values) = map(elementwise_rescale, values) rescale(values, ::Nothing) = values diff --git a/src/transformations/histogram.jl b/src/transformations/histogram.jl index fcdbdd967..727b93cc0 100644 --- a/src/transformations/histogram.jl +++ b/src/transformations/histogram.jl @@ -41,7 +41,9 @@ function (h::HistogramAnalysis)(input::ProcessedLayer) output = map(input) do p, n hist = _histogram(Tuple(p); pairs(n)..., pairs(options)...) - return (map(midpoints, hist.edges)..., hist.weights), (;) + edges, weights = hist.edges, hist.weights + named = length(edges) === 1 ? (width=step(first(edges)),) : (;) + return (map(midpoints, edges)..., weights), named end N = length(input.positional) From 195a60d655f6d373260630e9ba3a7f861576dd6a Mon Sep 17 00:00:00 2001 From: piever Date: Mon, 31 Jan 2022 16:10:28 +0100 Subject: [PATCH 4/8] fix stacked hist --- src/algebra/layer.jl | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/algebra/layer.jl b/src/algebra/layer.jl index ceecfc1b6..563e7a807 100644 --- a/src/algebra/layer.jl +++ b/src/algebra/layer.jl @@ -183,14 +183,32 @@ function mergeable(processedlayer::ProcessedLayer) return false end +function extend_scalars(v, n) + w = Broadcast.broadcastable(v) + nd = ndims(w) + if nd == 0 + # TODO: consider lazy container + return fill(only(w), n) + elseif nd == 1 + assert_equal(length(w), n) + return w + else + throw(ArgumentError("expecting scalar or vector, found array with $nd indices")) + end +end + # This method works on a list of "sliced" `ProcessedLayer`s function concatenate(pls::AbstractVector{ProcessedLayer}) pl = first(pls) - ns = [mapreduce(length, assert_equal, Iterators.flatten([pl.positional, pl.named])) for pl in pls] + ns = [mapreduce(length, assert_equal, pl.positional) for pl in pls] - primary = map(key -> reduce(vcat, [fill(pl.primary[key], n) for (pl, n) in zip(pls, ns)]), keys(pl.primary)) + primary = map(keys(pl.primary)) do key + return reduce(vcat, [extend_scalars(pl.primary[key], n) for (pl, n) in zip(pls, ns)]) + end + named = map(keys(pl.named)) do key + return reduce(vcat, [extend_scalars(pl.named[key], n) for (pl, n) in zip(pls, ns)]) + end positional = map(key -> reduce(vcat, [pl.positional[key] for pl in pls]), keys(pl.positional)) - named = map(key -> reduce(vcat, [pl.named[key] for pl in pls]), keys(pl.named)) return ProcessedLayer(pl; primary, positional, named) end From 33c9ca9a0ea7be22c80b1eee42c45595557bf4a8 Mon Sep 17 00:00:00 2001 From: piever Date: Mon, 31 Jan 2022 16:25:03 +0100 Subject: [PATCH 5/8] cleaner fix --- src/algebra/layer.jl | 24 +++--------------------- src/transformations/histogram.jl | 2 +- 2 files changed, 4 insertions(+), 22 deletions(-) diff --git a/src/algebra/layer.jl b/src/algebra/layer.jl index 563e7a807..ceecfc1b6 100644 --- a/src/algebra/layer.jl +++ b/src/algebra/layer.jl @@ -183,32 +183,14 @@ function mergeable(processedlayer::ProcessedLayer) return false end -function extend_scalars(v, n) - w = Broadcast.broadcastable(v) - nd = ndims(w) - if nd == 0 - # TODO: consider lazy container - return fill(only(w), n) - elseif nd == 1 - assert_equal(length(w), n) - return w - else - throw(ArgumentError("expecting scalar or vector, found array with $nd indices")) - end -end - # This method works on a list of "sliced" `ProcessedLayer`s function concatenate(pls::AbstractVector{ProcessedLayer}) pl = first(pls) - ns = [mapreduce(length, assert_equal, pl.positional) for pl in pls] + ns = [mapreduce(length, assert_equal, Iterators.flatten([pl.positional, pl.named])) for pl in pls] - primary = map(keys(pl.primary)) do key - return reduce(vcat, [extend_scalars(pl.primary[key], n) for (pl, n) in zip(pls, ns)]) - end - named = map(keys(pl.named)) do key - return reduce(vcat, [extend_scalars(pl.named[key], n) for (pl, n) in zip(pls, ns)]) - end + primary = map(key -> reduce(vcat, [fill(pl.primary[key], n) for (pl, n) in zip(pls, ns)]), keys(pl.primary)) positional = map(key -> reduce(vcat, [pl.positional[key] for pl in pls]), keys(pl.positional)) + named = map(key -> reduce(vcat, [pl.named[key] for pl in pls]), keys(pl.named)) return ProcessedLayer(pl; primary, positional, named) end diff --git a/src/transformations/histogram.jl b/src/transformations/histogram.jl index 727b93cc0..621bd4839 100644 --- a/src/transformations/histogram.jl +++ b/src/transformations/histogram.jl @@ -42,7 +42,7 @@ function (h::HistogramAnalysis)(input::ProcessedLayer) output = map(input) do p, n hist = _histogram(Tuple(p); pairs(n)..., pairs(options)...) edges, weights = hist.edges, hist.weights - named = length(edges) === 1 ? (width=step(first(edges)),) : (;) + named = length(edges) == 1 ? (; width=diff(first(edges))) : (;) return (map(midpoints, edges)..., weights), named end From a33942dad25ffca3762655871290eaa88ddc9cce Mon Sep 17 00:00:00 2001 From: piever Date: Mon, 31 Jan 2022 16:47:25 +0100 Subject: [PATCH 6/8] test new functionality --- src/transformations/histogram.jl | 10 ++++++++-- test/analyses.jl | 21 ++++++++++++++++++--- test/runtests.jl | 1 + 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/src/transformations/histogram.jl b/src/transformations/histogram.jl index 621bd4839..c69012c8f 100644 --- a/src/transformations/histogram.jl +++ b/src/transformations/histogram.jl @@ -3,13 +3,19 @@ const categoricalplottypes = [BarPlot, Heatmap, Volume] function compute_edges(intervals::Tuple, bins, closed) bs = bins isa Tuple ? bins : map(_ -> bins, intervals) return map(intervals, bs) do (min, max), b - b isa AbstractRange && return b + b isa AbstractVector && return b b isa Integer && return histrange(float(min), float(max), b, closed) - msg = "only AbstractRange and Integer or tuples thereof are accepted as bins" + msg = "only AbstractVector and Integer or tuples thereof are accepted as bins" throw(ArgumentError(msg)) end end +function midpoints(edges::AbstractVector) + i0, i1 = firstindex(edges), lastindex(edges) + front, tail = view(edges, i0:i1-1), view(edges, i0+1:i1) + return (front .+ tail) ./ 2 +end + function midpoints(edges::AbstractRange) min, s, l = minimum(edges), step(edges), length(edges) return range(min + s / 2, step=s, length=l - 1) diff --git a/test/analyses.jl b/test/analyses.jl index 804be868e..3d094f8bf 100644 --- a/test/analyses.jl +++ b/test/analyses.jl @@ -245,6 +245,15 @@ end @test labels == map(AlgebraOfGraphics.to_label, processedlayer.labels) end +@testset "midpoints" begin + edges = [1, 2, 10, 12] + @test midpoints(edges) ≈ [1.5, 6, 11] + + edges_rg = 1:2:5 + edges_v = [1, 3, 5] + @test midpoints(edges_v) ≈ midpoints(edges_rg) ≈ [2, 4] +end + @testset "histogram1D" begin df = (x=rand(1000), c=rand(["a", "b"], 1000)) bins = 0:0.01:1 @@ -279,16 +288,19 @@ end w2 = fit(Histogram, x2, bins2).weights rgx, w = processedlayer.positional + widths = processedlayer.named[:width] @test rgx[1] ≈ (bins1[1:end-1] .+ bins1[2:end]) ./ 2 @test w[1] ≈ w1 + @test widths[1] ≈ diff(bins1) @test rgx[2] ≈ (bins2[1:end-1] .+ bins2[2:end]) ./ 2 @test w[2] ≈ w2 + @test widths[2] ≈ diff(bins2) @test processedlayer.primary == NamedArguments((color=["a", "b"],)) - @test isempty(processedlayer.named) @test processedlayer.attributes == NamedArguments((gap=0, dodge_gap=0)) + @test keys(processedlayer.named) == Indices([:width]) @test processedlayer.plottype == AlgebraOfGraphics.BarPlot labels = MixedArguments() @@ -304,7 +316,7 @@ end @testset "weightedhistogram1d" begin df = (x=rand(1000), z=rand(1000), c=rand(["a", "b"], 1000)) - bins = 0:0.01:1 + bins = collect(0:0.01:1) # test vector of bins layer = data(df) * mapping(:x, color=:c, weights=:z) * histogram(; bins) processedlayer = AlgebraOfGraphics.ProcessedLayer(layer) @@ -318,15 +330,18 @@ end w2 = fit(Histogram, x2, weights(z2), bins).weights rgx, w = processedlayer.positional + widths = processedlayer.named[:width] @test rgx[1] ≈ (bins[1:end-1] .+ bins[2:end]) ./ 2 @test w[1] ≈ w1 + @test widths[1] ≈ diff(bins) @test rgx[2] ≈ (bins[1:end-1] .+ bins[2:end]) ./ 2 @test w[2] ≈ w2 + @test widths[2] ≈ diff(bins) @test processedlayer.primary == NamedArguments((color=["a", "b"],)) - @test isempty(processedlayer.named) + @test keys(processedlayer.named) == Indices([:width]) @test processedlayer.attributes == NamedArguments((gap=0, dodge_gap=0)) @test processedlayer.plottype == AlgebraOfGraphics.BarPlot diff --git a/test/runtests.jl b/test/runtests.jl index e6b423f1e..93f278595 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,6 +2,7 @@ using AlgebraOfGraphics, Makie, Random, Statistics, Test using AlgebraOfGraphics: Sorted using AlgebraOfGraphics: separate +using AlgebraOfGraphics: midpoints using AlgebraOfGraphics: Arguments, MixedArguments, NamedArguments using KernelDensity: kde, pdf From 44eeee0532d730c245f600ba5b166a8eb0170669 Mon Sep 17 00:00:00 2001 From: piever Date: Mon, 31 Jan 2022 16:54:18 +0100 Subject: [PATCH 7/8] fix tests --- src/transformations/histogram.jl | 6 +++--- test/runtests.jl | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/transformations/histogram.jl b/src/transformations/histogram.jl index c69012c8f..7508232f5 100644 --- a/src/transformations/histogram.jl +++ b/src/transformations/histogram.jl @@ -70,12 +70,12 @@ end Compute a histogram. -The attribute `bins` can be an `Integer`, an `AbstractRange`, or a `Tuple` -of either integers or ranges (useful for 2- or 3-dimensional histograms). +The attribute `bins` can be an `Integer`, an `AbstractVector` (in particular, a range), or +a `Tuple` of either integers or abstract vectors (useful for 2- or 3-dimensional histograms). When `bins` is an `Integer`, it denotes the approximate number of equal-width intervals used to compute the histogram. In that case, the range covered by the intervals is defined by `datalimits` (defaults to the extrema of the data). -When `bins` is an `AbstractRange`, it denotes the intervals directly. +When `bins` is an `AbstractVector`, it denotes the intervals directly. `closed` determines whether the the intervals are closed to the left or to the right. diff --git a/test/runtests.jl b/test/runtests.jl index 93f278595..a2684a42d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,6 +5,8 @@ using AlgebraOfGraphics: separate using AlgebraOfGraphics: midpoints using AlgebraOfGraphics: Arguments, MixedArguments, NamedArguments +using Dictionaries: Indices + using KernelDensity: kde, pdf using StatsBase: fit, histrange, Histogram, weights using GLM: GLM From af1972cd2ad35d6a0576deb6041046ffe5b37296 Mon Sep 17 00:00:00 2001 From: piever Date: Mon, 31 Jan 2022 17:28:25 +0100 Subject: [PATCH 8/8] extra tests --- test/analyses.jl | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/test/analyses.jl b/test/analyses.jl index 3d094f8bf..dcd0200b4 100644 --- a/test/analyses.jl +++ b/test/analyses.jl @@ -268,12 +268,15 @@ end w2 = fit(Histogram, x2, bins).weights rgx, w = processedlayer.positional + width = processedlayer.named[:width] @test rgx[1] ≈ (bins[1:end-1] .+ bins[2:end]) ./ 2 @test w[1] == w1 + @test width[1] ≈ diff(bins) @test rgx[2] ≈ (bins[1:end-1] .+ bins[2:end]) ./ 2 @test w[2] == w2 + @test width[2] ≈ diff(bins) bins, closed = 12, :left layer = data(df) * mapping(:x, color=:c) * histogram(; bins, closed, datalimits=extrema) @@ -288,15 +291,15 @@ end w2 = fit(Histogram, x2, bins2).weights rgx, w = processedlayer.positional - widths = processedlayer.named[:width] + width = processedlayer.named[:width] @test rgx[1] ≈ (bins1[1:end-1] .+ bins1[2:end]) ./ 2 @test w[1] ≈ w1 - @test widths[1] ≈ diff(bins1) + @test width[1] ≈ diff(bins1) @test rgx[2] ≈ (bins2[1:end-1] .+ bins2[2:end]) ./ 2 @test w[2] ≈ w2 - @test widths[2] ≈ diff(bins2) + @test width[2] ≈ diff(bins2) @test processedlayer.primary == NamedArguments((color=["a", "b"],)) @test processedlayer.attributes == NamedArguments((gap=0, dodge_gap=0)) @@ -330,15 +333,15 @@ end w2 = fit(Histogram, x2, weights(z2), bins).weights rgx, w = processedlayer.positional - widths = processedlayer.named[:width] + width = processedlayer.named[:width] @test rgx[1] ≈ (bins[1:end-1] .+ bins[2:end]) ./ 2 @test w[1] ≈ w1 - @test widths[1] ≈ diff(bins) + @test width[1] ≈ diff(bins) @test rgx[2] ≈ (bins[1:end-1] .+ bins[2:end]) ./ 2 @test w[2] ≈ w2 - @test widths[2] ≈ diff(bins) + @test width[2] ≈ diff(bins) @test processedlayer.primary == NamedArguments((color=["a", "b"],)) @test keys(processedlayer.named) == Indices([:width])