From da5ba15ba1706311144ab6e2f14f7357af91a941 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Sat, 28 Jul 2018 18:31:00 -0400 Subject: [PATCH] Implement sortslices, deprecate sortrows/sortcols As discussed on triage, `sortslices` is the higher dimensional extension of `sortrows`/`sortcols`. The dimensions being specified are the dimensions (and for higher dimensions the order of the dimensions) to slice along. See the help text for an example of the higher dimensional behavior. Deprecate sortrows/sortcols in favor of sortslices. --- NEWS.md | 2 + base/deprecated.jl | 3 + base/exports.jl | 3 +- base/sort.jl | 164 ++++++++++++++++++++++++++++++++++--------- doc/src/base/sort.md | 3 +- test/arrayops.jl | 24 +++++-- test/offsetarray.jl | 4 +- 7 files changed, 159 insertions(+), 44 deletions(-) diff --git a/NEWS.md b/NEWS.md index 3cde88143e8e2..50c7d4783ce97 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1325,6 +1325,8 @@ Deprecated or removed * `realmin`/`realmax` are deprecated in favor of `floatmin`/`floatmax` ([#28302]). + * `sortrows`/`sortcols` have been deprecated in favor of the more general `sortslices`. + Command-line option changes --------------------------- diff --git a/base/deprecated.jl b/base/deprecated.jl index 853eb0bb5b71e..f926897185c62 100644 --- a/base/deprecated.jl +++ b/base/deprecated.jl @@ -1775,6 +1775,9 @@ end @deprecate realmin floatmin @deprecate realmax floatmax +@deprecate sortrows(A::AbstractMatrix; kws...) sortslices(A, dims=:rows, kws...) +@deprecate sortcols(A::AbstractMatrix; kws...) sortslices(A, dims=:cols, kws...) + # END 0.7 deprecations # BEGIN 1.0 deprecations diff --git a/base/exports.jl b/base/exports.jl index 97553a8773334..b684eaf9eca49 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -419,10 +419,9 @@ export selectdim, sort!, sort, - sortcols, sortperm, sortperm!, - sortrows, + sortslices, dropdims, step, stride, diff --git a/base/sort.jl b/base/sort.jl index c655f5d355e2b..0d22f8e29a3d1 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -37,8 +37,7 @@ export # also exported by Base partialsort!, partialsortperm, partialsortperm!, - sortrows, - sortcols, + sortslices, # algorithms: InsertionSort, QuickSort, @@ -933,73 +932,172 @@ end Av end - """ - sortrows(A; alg::Algorithm=DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward) + sortslices(A; dims, alg::Algorithm=DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward) -Sort the rows of matrix `A` lexicographically. -See [`sort!`](@ref) for a description of possible -keyword arguments. +Sort slices of an array `A`. The keyword argument `dims` must be either +an integer or a tuple of integers specifies along which dimension to take the +slices. + +If `A` is a matrix, `dims=1` will sort rows, `dims=2` will sort columns. +Note that the default comparison function on one dimensional slices sorts +lexicographically. + +For convenience, `dims=:rows` and `dims=:cols` are supported as alternative +spellings for `dims=1` and `dims=2` respectively. + +For the remaining keyword arguments, see the documentation of [`sort!`](@ref). # Examples ```jldoctest -julia> sortrows([7 3 5; -1 6 4; 9 -2 8]) +julia> sortslices([7 3 5; -1 6 4; 9 -2 8], dims=1) # Sort rows +3×3 Array{Int64,2}: + -1 6 4 + 7 3 5 + 9 -2 8 + +julia> sortslices([7 3 5; -1 6 4; 9 -2 8], dims=:rows) 3×3 Array{Int64,2}: -1 6 4 7 3 5 9 -2 8 -julia> sortrows([7 3 5; -1 6 4; 9 -2 8], lt=(x,y)->isless(x[2],y[2])) +julia> sortslices([7 3 5; -1 6 4; 9 -2 8], dims=1, lt=(x,y)->isless(x[2],y[2])) 3×3 Array{Int64,2}: 9 -2 8 7 3 5 -1 6 4 -julia> sortrows([7 3 5; -1 6 4; 9 -2 8], rev=true) +julia> sortslices([7 3 5; -1 6 4; 9 -2 8], dims=1, rev=true) 3×3 Array{Int64,2}: 9 -2 8 7 3 5 -1 6 4 -``` -""" -function sortrows(A::AbstractMatrix; kws...) - rows = [view(A, i, :) for i in axes(A,1)] - p = sortperm(rows; kws...) - A[p,:] -end -""" - sortcols(A; alg::Algorithm=DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward) - -Sort the columns of matrix `A` lexicographically. -See [`sort!`](@ref) for a description of possible -keyword arguments. - -# Examples -```jldoctest -julia> sortcols([7 3 5; 6 -1 -4; 9 -2 8]) +julia> sortslices([7 3 5; 6 -1 -4; 9 -2 8], dims=2) # Sort columns 3×3 Array{Int64,2}: 3 5 7 -1 -4 6 -2 8 9 -julia> sortcols([7 3 5; 6 -1 -4; 9 -2 8], alg=InsertionSort, lt=(x,y)->isless(x[2],y[2])) +julia> sortslices([7 3 5; 6 -1 -4; 9 -2 8], dims=2, alg=InsertionSort, lt=(x,y)->isless(x[2],y[2])) 3×3 Array{Int64,2}: 5 3 7 -4 -1 6 8 -2 9 -julia> sortcols([7 3 5; 6 -1 -4; 9 -2 8], rev=true) +julia> sortslices([7 3 5; 6 -1 -4; 9 -2 8], dims=2, rev=true) 3×3 Array{Int64,2}: 7 5 3 6 -4 -1 9 8 -2 ``` + +# Higher dimensions + +`sortslices` extends naturally to higher dimensions. E.g., if `A` is a +a 2x2x2 array, `sortslices(A, dims=2)` will slices along the 3rd dimension, +passing the 2x2 slices in each dimension to the comparison function. Note that +while there is no default order on higher-dimensional slices, you may use the +`by` or `lt` keyword argument to specify such an order. + +If `dims` is a tuple, the order of the dimensions in `dims` is +relevant and specifies the linear order of the slices. E.g., if `A` is three +dimensional and `dims` is `(1,2)`, we will take slices in the dimension three, +and then arrange the result back into the array in column-major order. If +`dims` is `(2,1)` instead, the same slices will be taken, but the result order +will be row-major instead. + +# Higher dimensional examples +``` +julia> A = permutedims(reshape([4 3; 2 1; 'A' 'B'; 'C' 'D'], (2, 2, 2)), (1, 3, 2)) +2×2×2 Array{Any,3}: +[:, :, 1] = + 4 3 + 2 1 + +[:, :, 2] = + 'A' 'B' + 'C' 'D' + +julia> sortslices(A, dims=(1,2)) +2×2×2 Array{Any,3}: +[:, :, 1] = + 1 3 + 2 4 + +[:, :, 2] = + 'D' 'B' + 'C' 'A' + +julia> sortslices(A, dims=(2,1)) +2×2×2 Array{Any,3}: +[:, :, 1] = + 1 2 + 3 4 + +[:, :, 2] = + 'D' 'C' + 'B' 'A' + +julia> sortslices(reshape([5; 4; 3; 2; 1], (1,1,5)), dims=3, by=x->x[1,1]) +1×1×5 Array{Int64,3}: +[:, :, 1] = + 1 + +[:, :, 2] = + 2 + +[:, :, 3] = + 3 + +[:, :, 4] = + 4 + +[:, :, 5] = + 5 +``` """ -function sortcols(A::AbstractMatrix; kws...) - cols = [view(A, :, i) for i in axes(A,2)] - p = sortperm(cols; kws...) - A[:,p] +function sortslices(A::AbstractArray; dims, kws...) + if isa(dims, Symbol) + dims = dims == :rows ? 1 : + dims == :cols ? 2 : + error("Only :rows or :cols are allowed as symbolic dimensions") + end + _sortslices(A, Val{dims}(); kws...) +end + +# Works around inference's lack of ability to recognize partial constness +struct DimSelector{dims, T} + A::T +end +DimSelector{dims}(x::T) where {dims, T} = DimSelector{dims, T}(x) +(ds::DimSelector{dims, T})(i) where {dims, T} = i in dims ? axes(ds.A, i) : (:,) + +_negdims(n, dims) = filter(i->!(i in dims), 1:n) + +function compute_itspace(A, ::Val{dims}) where {dims} + negdims = _negdims(ndims(A), dims) + axs = Iterators.product(ntuple(DimSelector{dims}(A), ndims(A))...) + vec(permutedims(collect(axs), (dims..., negdims...))) +end + +function _sortslices(A::AbstractArray, d::Val{dims}; kws...) where dims + itspace = compute_itspace(A, d) + vecs = map(its->view(A, its...), itspace) + p = sortperm(vecs; kws...) + if ndims(A) == 2 + # At the moment, the performance of the generic version is subpar + # (about 5x slower). Hardcode a fast-path until we're able to + # optimize this. + return dims == 1 ? A[p, :] : A[:, p] + else + B = similar(A) + for (x, its) in zip(p, itspace) + B[its...] = vecs[x] + end + B + end end ## fast clever sorting for floats ## diff --git a/doc/src/base/sort.md b/doc/src/base/sort.md index ffb4725b6bcf6..a3c3b7c3df338 100644 --- a/doc/src/base/sort.md +++ b/doc/src/base/sort.md @@ -111,8 +111,7 @@ Base.sort! Base.sort Base.sortperm Base.Sort.sortperm! -Base.Sort.sortrows -Base.Sort.sortcols +Base.Sort.sortslices ``` ## Order-Related Functions diff --git a/test/arrayops.jl b/test/arrayops.jl index 9306a6e592bf3..88dbe320f6771 100644 --- a/test/arrayops.jl +++ b/test/arrayops.jl @@ -659,7 +659,7 @@ let A, B, C, D # 10 repeats of each row B = A[shuffle!(repeat(1:10, 10)), :] C = unique(B, dims=1) - @test sortrows(C) == sortrows(A) + @test sortslices(C, dims=:cols) == sortslices(A, dims=:rows) @test unique(B, dims=2) == B @test unique(B', dims=2)' == C @@ -1173,11 +1173,11 @@ end @testset "sort on arrays" begin local a = rand(3,3) - asr = sortrows(a) + asr = sortslices(a, dims=:rows) @test isless(asr[1,:],asr[2,:]) @test isless(asr[2,:],asr[3,:]) - asc = sortcols(a) + asc = sortslices(a, dims=:cols) @test isless(asc[:,1],asc[:,2]) @test isless(asc[:,2],asc[:,3]) @@ -1187,11 +1187,11 @@ end @test m == zeros(3, 4) @test o == fill(1, 3, 4) - asr = sortrows(a, rev=true) + asr = sortslices(a, dims=:rows, rev=true) @test isless(asr[2,:],asr[1,:]) @test isless(asr[3,:],asr[2,:]) - asc = sortcols(a, rev=true) + asc = sortslices(a, dims=:cols, rev=true) @test isless(asc[:,2],asc[:,1]) @test isless(asc[:,3],asc[:,2]) @@ -1223,6 +1223,20 @@ end @test all(bs[:,:,1] .<= bs[:,:,2]) end +@testset "higher dimensional sortslices" + A = permutedims(reshape([4 3; 2 1; 'A' 'B'; 'C' 'D'], (2, 2, 2)), (1, 3, 2)) + @test sortslices(A, dims=(1, 2)) == + permutedims(reshape([1 3; 2 4; 'D' 'B'; 'C' 'A'], (2, 2, 2)), (1, 3, 2)) + @test sortslices(A, dims=(2, 1)) == + permutedims(reshape([1 2; 3 4; 'D' 'C'; 'B' 'A'], (2, 2, 2)), (1, 3, 2)) + B = reshape(1:16, (2,2,2) + @test sortslices(B, dims=(3,1))[:, :, 1] == [ + 1 3; + 5 7 + ] + @test sortslices(B, dims=(1,3)) == B +end + @testset "fill" begin @test fill!(Float64[1.0], -0.0)[1] === -0.0 A = fill(1.,3,3) diff --git a/test/offsetarray.jl b/test/offsetarray.jl index 2881261d73c86..35c0a8fa385c6 100644 --- a/test/offsetarray.jl +++ b/test/offsetarray.jl @@ -434,8 +434,8 @@ amin, amax = extrema(parent(A)) @test unique(A, dims=2) == OffsetArray(parent(A), first(axes(A, 1)) - 1, 0) v = OffsetArray(rand(8), (-2,)) @test sort(v) == OffsetArray(sort(parent(v)), v.offsets) -@test sortrows(A) == OffsetArray(sortrows(parent(A)), A.offsets) -@test sortcols(A) == OffsetArray(sortcols(parent(A)), A.offsets) +@test sortslices(A, dims=:rows) == OffsetArray(sortslices(parent(A), dims=:rows), A.offsets) +@test sortslices(A, dims=:cols) == OffsetArray(sortslices(parent(A), dims=:cols), A.offsets) @test sort(A, dims=1) == OffsetArray(sort(parent(A), dims=1), A.offsets) @test sort(A, dims=2) == OffsetArray(sort(parent(A), dims=2), A.offsets)