Skip to content

Commit

Permalink
[BREAKING] remove CategoricalArrays dependency from joins (#2505)
Browse files Browse the repository at this point in the history
  • Loading branch information
bkamins authored Nov 1, 2020
1 parent b76c04f commit 540f901
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 7 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
choose the fast path only when it is safe; this resolves inconsistencies
with what the same functions not using fast path produce
([#2357](https://github.com/JuliaData/DataFrames.jl/pull/2357))
* joins now return `PooledVector` not `CategoricalVector` in indicator column
([#2505](https://github.com/JuliaData/DataFrames.jl/pull/2505))
* `GroupKeys` now supports `in` for `GroupKey`, `Tuple`, `NamedTuple` and dictionaries
([2392](https://github.com/JuliaData/DataFrames.jl/pull/2392))
* in `describe` the specification of custom aggregation is now `function => name`;
Expand Down
18 changes: 11 additions & 7 deletions src/abstractdataframe/join.jl
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,10 @@ function compose_joined_table(joiner::DataFrameJoiner, kind::Symbol,
# about the permutation of left data frame in rightjoin as we always
# assign 0x1 to it anyway and these rows are guaranteed to come first
# (even if they are permuted)
left_indicator = zeros(UInt8, nrow)
left_indicator[axes(all_orig_left_ixs, 1)] .= 0x1
right_indicator = zeros(UInt8, nrow)
right_indicator[axes(all_orig_right_ixs, 1)] .= 0x2
left_indicator = zeros(UInt32, nrow)
left_indicator[axes(all_orig_left_ixs, 1)] .= 1
right_indicator = zeros(UInt32, nrow)
right_indicator[axes(all_orig_right_ixs, 1)] .= 2
permute!(right_indicator, right_perm)
end

Expand Down Expand Up @@ -416,9 +416,13 @@ function _join(df1::AbstractDataFrame, df2::AbstractDataFrame;
end

if indicator !== nothing
refs = left_indicator + right_indicator
pool = CategoricalPool{String,UInt8}(["left_only", "right_only", "both"])
indicatorcol = CategoricalArray{String,1}(refs, pool)
left_indicator .+= right_indicator
pool = ["left_only", "right_only", "both"]
invpool = Dict{String, UInt32}("left_only" => 1,
"right_only" => 2,
"both" => 3)
indicatorcol = PooledArray(PooledArrays.RefArray(left_indicator),
invpool, pool)

unique_indicator = indicator
if makeunique
Expand Down

0 comments on commit 540f901

Please sign in to comment.