Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

code layout improvements #2536

Merged
merged 2 commits into from
Nov 13, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 12 additions & 7 deletions src/abstractdataframe/abstractdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -424,8 +424,13 @@ applied to all pairs of columns stored in `df1` and `df2` returns `true`.
function Base.isapprox(df1::AbstractDataFrame, df2::AbstractDataFrame;
atol::Real=0, rtol::Real=atol>0 ? 0 : √eps(),
nans::Bool=false, norm::Function=norm)
size(df1) == size(df2) || throw(DimensionMismatch("dimensions must match: a has dims $(size(df1)), b has dims $(size(df2))"))
isequal(index(df1), index(df2)) || throw(ArgumentError("column names of passed data frames do not match"))
if size(df1) != size(df2)
throw(DimensionMismatch("dimensions must match: a has dims " *
"$(size(df1)), b has dims $(size(df2))"))
end
if !isequal(index(df1), index(df2))
throw(ArgumentError("column names of passed data frames do not match"))
end
return all(isapprox.(eachcol(df1), eachcol(df2), atol=atol, rtol=rtol, nans=nans, norm=norm))
end
##############################################################################
Expand Down Expand Up @@ -911,7 +916,7 @@ julia> dropmissing!(df3, [:x, :y])
function dropmissing!(df::AbstractDataFrame,
cols::Union{ColumnIndex, MultiColumnIndex}=:;
disallowmissing::Bool=true)
inds = completecases(df, cols)
inds = completecases(df, cols)
inds .= .!(inds)
delete!(df, inds)
disallowmissing && disallowmissing!(df, cols)
Expand Down Expand Up @@ -1198,8 +1203,8 @@ nonunique(df, 1)
"""
function nonunique(df::AbstractDataFrame)
if ncol(df) == 0
throw(ArgumentError("finding duplicate rows in data frame with no" *
" columns is not allowed"))
throw(ArgumentError("finding duplicate rows in data frame with no " *
"columns is not allowed"))
end
gslots = row_group_slots(ntuple(i -> df[!, i], ncol(df)), Val(true))[3]
# unique rows are the first encountered group representatives,
Expand Down Expand Up @@ -1853,8 +1858,8 @@ function flatten(df::AbstractDataFrame,
if any(x -> length(x[1]) != x[2], zip(v, lengths))
r = findfirst(x -> x != 0, length.(v) .- lengths)
colnames = _names(df)
throw(ArgumentError("Lengths of iterables stored in columns :$(colnames[col1])" *
" and :$(colnames[col]) are not the same in row $r"))
throw(ArgumentError("Lengths of iterables stored in columns :$(colnames[col1]) " *
"and :$(colnames[col]) are not the same in row $r"))
end
end

Expand Down
4 changes: 2 additions & 2 deletions src/abstractdataframe/iteration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ Base.summary(dfrs::DataFrameRows) = "$(length(dfrs))-element DataFrameRows"
Base.summary(io::IO, dfrs::DataFrameRows) = print(io, summary(dfrs))

Base.iterate(::AbstractDataFrame) =
error("AbstractDataFrame is not iterable. Use eachrow(df) to get a row iterator" *
" or eachcol(df) to get a column iterator")
error("AbstractDataFrame is not iterable. Use eachrow(df) to get a row iterator " *
"or eachcol(df) to get a column iterator")

"""
eachrow(df::AbstractDataFrame)
Expand Down
20 changes: 10 additions & 10 deletions src/abstractdataframe/join.jl
Original file line number Diff line number Diff line change
Expand Up @@ -462,8 +462,8 @@ function _join(df1::AbstractDataFrame, df2::AbstractDataFrame;

if hasproperty(joined, unique_indicator)
throw(ArgumentError("joined data frame already has column " *
":$unique_indicator. Pass makeunique=true to" *
" make it unique using a suffix automatically."))
":$unique_indicator. Pass makeunique=true to " *
"make it unique using a suffix automatically."))
end
joined[!, unique_indicator] = indicatorcol
else
Expand Down Expand Up @@ -594,8 +594,8 @@ function innerjoin(df1::AbstractDataFrame, df2::AbstractDataFrame;
renamecols::Pair=identity => identity,
matchmissing::Symbol=:error)
if !all(x -> x isa Union{Function, AbstractString, Symbol}, renamecols)
throw(ArgumentError("renamecols keyword argument must be a `Pair`" *
" containing functions, strings, or `Symbol`s"))
throw(ArgumentError("renamecols keyword argument must be a `Pair` " *
"containing functions, strings, or `Symbol`s"))
end
return _join(df1, df2, on=on, kind=:inner, makeunique=makeunique,
indicator=nothing, validate=validate,
Expand Down Expand Up @@ -731,8 +731,8 @@ function leftjoin(df1::AbstractDataFrame, df2::AbstractDataFrame;
validate::Union{Pair{Bool, Bool}, Tuple{Bool, Bool}}=(false, false),
renamecols::Pair=identity => identity, matchmissing::Symbol=:error)
if !all(x -> x isa Union{Function, AbstractString, Symbol}, renamecols)
throw(ArgumentError("renamecols keyword argument must be a `Pair`" *
" containing functions, strings, or `Symbol`s"))
throw(ArgumentError("renamecols keyword argument must be a `Pair` " *
"containing functions, strings, or `Symbol`s"))
end
return _join(df1, df2, on=on, kind=:left, makeunique=makeunique,
indicator=indicator, validate=validate,
Expand Down Expand Up @@ -860,8 +860,8 @@ function rightjoin(df1::AbstractDataFrame, df2::AbstractDataFrame;
validate::Union{Pair{Bool, Bool}, Tuple{Bool, Bool}}=(false, false),
renamecols::Pair=identity => identity, matchmissing::Symbol=:error)
if !all(x -> x isa Union{Function, AbstractString, Symbol}, renamecols)
throw(ArgumentError("renamecols keyword argument must be a `Pair`" *
" containing functions, strings, or `Symbol`s"))
throw(ArgumentError("renamecols keyword argument must be a `Pair` " *
"containing functions, strings, or `Symbol`s"))
end
return _join(df1, df2, on=on, kind=:right, makeunique=makeunique,
indicator=indicator, validate=validate,
Expand Down Expand Up @@ -1000,8 +1000,8 @@ function outerjoin(df1::AbstractDataFrame, df2::AbstractDataFrame;
validate::Union{Pair{Bool, Bool}, Tuple{Bool, Bool}}=(false, false),
renamecols::Pair=identity => identity, matchmissing::Symbol=:error)
if !all(x -> x isa Union{Function, AbstractString, Symbol}, renamecols)
throw(ArgumentError("renamecols keyword argument must be a `Pair`" *
" containing functions, strings, or `Symbol`s"))
throw(ArgumentError("renamecols keyword argument must be a `Pair` " *
"containing functions, strings, or `Symbol`s"))
end
return _join(df1, df2, on=on, kind=:outer, makeunique=makeunique,
indicator=indicator, validate=validate,
Expand Down
4 changes: 2 additions & 2 deletions src/abstractdataframe/reshape.jl
Original file line number Diff line number Diff line change
Expand Up @@ -395,8 +395,8 @@ function _unstack(df::AbstractDataFrame, rowkeys::AbstractVector{Int},
colref_map = df[col_group_row_idxs, colkey]

if any(ismissing, colref_map) && !allowmissing
throw(ArgumentError("Missing value in variable :$(_names(df)[colkey])." *
" Pass `allowmissing=true` to skip missings."))
throw(ArgumentError("Missing value in variable :$(_names(df)[colkey]). " *
"Pass `allowmissing=true` to skip missings."))
end

unstacked_val = [similar_missing(valuecol, Nrow) for i in 1:Ncol]
Expand Down
44 changes: 22 additions & 22 deletions src/dataframe/dataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ struct DataFrame <: AbstractDataFrame
if length(columns) == length(colindex) == 0
return new(AbstractVector[], Index())
elseif length(columns) != length(colindex)
throw(DimensionMismatch("Number of columns ($(length(columns))) and number of" *
" column names ($(length(colindex))) are not equal"))
throw(DimensionMismatch("Number of columns ($(length(columns))) and number of " *
"column names ($(length(colindex))) are not equal"))
end

len = -1
Expand Down Expand Up @@ -198,8 +198,8 @@ struct DataFrame <: AbstractDataFrame
columns[i] = fill!(Tables.allocatecolumn(typeof(x), len), x)
else
if col isa AbstractArray
throw(ArgumentError("adding AbstractArray other than AbstractVector" *
" as a column of a data frame is not allowed"))
throw(ArgumentError("adding AbstractArray other than AbstractVector " *
"as a column of a data frame is not allowed"))
end
columns[i] = fill!(Tables.allocatecolumn(typeof(col), len), col)
end
Expand Down Expand Up @@ -274,8 +274,8 @@ function DataFrame(; kwargs...)
throw(ArgumentError("the `copycols` keyword argument must be Boolean"))
end
elseif kw === :makeunique
throw(ArgumentError("the `makeunique` keyword argument is not allowed" *
" in DataFrame(; kwargs...) constructor"))
throw(ArgumentError("the `makeunique` keyword argument is not allowed " *
"in DataFrame(; kwargs...) constructor"))
else
push!(cnames, kw)
push!(columns, val)
Expand Down Expand Up @@ -549,10 +549,10 @@ Base.setproperty!(df::DataFrame, col_ind::Symbol, v::AbstractVector) =
Base.setproperty!(df::DataFrame, col_ind::AbstractString, v::AbstractVector) =
(df[!, col_ind] = v)
Base.setproperty!(::DataFrame, col_ind::Symbol, v::Any) =
throw(ArgumentError("It is only allowed to pass a vector as a column of a DataFrame." *
throw(ArgumentError("It is only allowed to pass a vector as a column of a DataFrame. " *
"Instead use `df[!, col_ind] .= v` if you want to use broadcasting."))
Base.setproperty!(::DataFrame, col_ind::AbstractString, v::Any) =
throw(ArgumentError("It is only allowed to pass a vector as a column of a DataFrame." *
throw(ArgumentError("It is only allowed to pass a vector as a column of a DataFrame. " *
"Instead use `df[!, col_ind] .= v` if you want to use broadcasting."))

# df[SingleRowIndex, SingleColumnIndex] = Single Item
Expand All @@ -572,8 +572,8 @@ for T in MULTICOLUMNINDEX_TUPLE
col_inds::$T)
idxs = index(df)[col_inds]
if length(v) != length(idxs)
throw(DimensionMismatch("$(length(idxs)) columns were selected but the assigned" *
" collection contains $(length(v)) elements"))
throw(DimensionMismatch("$(length(idxs)) columns were selected but the assigned " *
"collection contains $(length(v)) elements"))
end
for (i, x) in zip(idxs, v)
df[row_ind, i] = x
Expand Down Expand Up @@ -642,9 +642,9 @@ for T1 in (:AbstractVector, :Not, :Colon, :(typeof(!))),
col_inds::$T2)
idxs = index(df)[col_inds]
if size(mx, 2) != length(idxs)
throw(DimensionMismatch("number of selected columns ($(length(idxs)))" *
" and number of columns in" *
" matrix ($(size(mx, 2))) do not match"))
throw(DimensionMismatch("number of selected columns ($(length(idxs))) " *
"and number of columns in " *
"matrix ($(size(mx, 2))) do not match"))
end
for (j, col) in enumerate(idxs)
df[row_inds, col] = (row_inds === !) ? mx[:, j] : view(mx, :, j)
Expand Down Expand Up @@ -825,8 +825,8 @@ function insertcols!(df::DataFrame, col::Int=ncol(df)+1; makeunique::Bool=false,
end
if !isempty(name_cols)
# an explicit error is thrown as keyword argument was supported in the past
throw(ArgumentError("inserting colums using a keyword argument is not supported," *
" pass a Pair as a positional argument instead"))
throw(ArgumentError("inserting colums using a keyword argument is not supported, " *
"pass a Pair as a positional argument instead"))
end
return df
end
Expand Down Expand Up @@ -1317,15 +1317,15 @@ function Base.push!(df::DataFrame, row::Union{AbstractDict, NamedTuple};
"is not allowed as it is unordered"))
elseif length(row) != ncol(df) || any(x -> x[1] != x[2], zip(keys(row), _names(df)))
throw(ArgumentError("when `cols == :orderequal` pushed row must " *
"have the same column names and in the" *
" same order as the target data frame"))
"have the same column names and in the " *
"same order as the target data frame"))
end
elseif cols === :setequal
# Only check for equal lengths if :setequal is selected,
# as an error will be thrown below if some names don't match
if length(row) != ncols
# an explicit error is thrown as this was allowed in the past
throw(ArgumentError("`push!` with `cols` equal to `:setequal`" *
throw(ArgumentError("`push!` with `cols` equal to `:setequal` " *
"requires `row` to have the same number of elements " *
"as the number of columns in `df`."))
end
Expand Down Expand Up @@ -1479,10 +1479,10 @@ julia> push!(df, NamedTuple(), cols=:subset)
function Base.push!(df::DataFrame, row::Any; promote::Bool=false)
if !(row isa Union{Tuple, AbstractArray})
# an explicit error is thrown as this was allowed in the past
throw(ArgumentError("`push!` does not allow passing collections of type" *
" $(typeof(row)) to be pushed into a DataFrame. Only" *
" `Tuple`, `AbstractArray`, `AbstractDict`, `DataFrameRow`" *
" and `NamedTuple` are allowed."))
throw(ArgumentError("`push!` does not allow passing collections of type " *
"$(typeof(row)) to be pushed into a DataFrame. Only " *
"`Tuple`, `AbstractArray`, `AbstractDict`, `DataFrameRow` " *
"and `NamedTuple` are allowed."))
end
nrows, ncols = size(df)
targetrows = nrows + 1
Expand Down
8 changes: 4 additions & 4 deletions src/dataframerow/dataframerow.jl
Original file line number Diff line number Diff line change
Expand Up @@ -231,8 +231,8 @@ for T in MULTICOLUMNINDEX_TUPLE
col_inds::$(T))
idxs = index(df)[col_inds]
if length(v) != length(idxs)
throw(DimensionMismatch("$(length(idxs)) columns were selected but the assigned" *
" collection contains $(length(v)) elements"))
throw(DimensionMismatch("$(length(idxs)) columns were selected but the assigned " *
"collection contains $(length(v)) elements"))
end

if v isa AbstractDict
Expand Down Expand Up @@ -475,8 +475,8 @@ function Base.isless(r1::DataFrameRow, r2::DataFrameRow)
if _names(r1) != _names(r2)
mismatch = findfirst(i -> _names(r1)[i] != _names(r2)[i], 1:length(r1))
throw(ArgumentError("compared DataFrameRows must have the same colum " *
"names but they differ in column number $mismatch" *
" where the names are :$(names(r1)[mismatch]) and " *
"names but they differ in column number $mismatch " *
"where the names are :$(names(r1)[mismatch]) and " *
":$(_names(r2)[mismatch]) respectively"))
end
for (a,b) in zip(r1, r2)
Expand Down
8 changes: 4 additions & 4 deletions src/groupeddataframe/splitapplycombine.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ function _combine_prepare(gd::GroupedDataFrame,
if any(x -> x isa Pair && first(x) isa Tuple, cs_vec)
x = cs_vec[findfirst(x -> first(x) isa Tuple, cs_vec)]
# an explicit error is thrown as this was allowed in the past
throw(ArgumentError("passing a Tuple $(first(x)) as column selector is not supported" *
", use a vector $(collect(first(x))) instead"))
throw(ArgumentError("passing a Tuple $(first(x)) as column selector is not " *
"supported, use a vector $(collect(first(x))) instead"))
end

cs_norm = []
Expand Down Expand Up @@ -226,8 +226,8 @@ function _combine_process_noop(cs_i::Pair{<:Union{Int, AbstractVector{Int}}, Pai
source_cols = first(cs_i)
out_col_name = last(last(cs_i))
if length(source_cols) != 1
throw(ArgumentError("Exactly one column can be transformed to one output column" *
" when using identity transformation"))
throw(ArgumentError("Exactly one column can be transformed to one output column " *
"when using identity transformation"))
end
outcol = parentdf[!, first(source_cols)]

Expand Down
16 changes: 8 additions & 8 deletions src/other/index.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ const MultiColumnIndex = Union{AbstractVector, Regex, Not, Between, All, Cols, C
const MULTICOLUMNINDEX_TUPLE = (:AbstractVector, :Regex, :Not, :Between, :All, :Cols, :Colon)

const COLUMNINDEX_STR = "`Symbol`, string or integer"
const MULTICOLUMNINDEX_STR = "`:`, `Cols`, `All`, `Between`, `Not`, a regular expression," *
" or a vector of `Symbol`s, strings or integers"
const MULTICOLUMNINDEX_STR = "`:`, `Cols`, `All`, `Between`, `Not`, a regular expression, " *
"or a vector of `Symbol`s, strings or integers"
bkamins marked this conversation as resolved.
Show resolved Hide resolved

struct Index <: AbstractIndex # an OrderedDict would be nice here...
lookup::Dict{Symbol, Int} # name => names array position
Expand Down Expand Up @@ -46,8 +46,8 @@ function rename!(x::Index, nms::AbstractVector{Symbol}; makeunique::Bool=false)
if length(unique(nms)) != length(nms)
dup = unique(nms[nonunique(DataFrame(nms=nms))])
dupstr = join(string.(':', dup), ", ", " and ")
msg = "Duplicate variable names: $dupstr. Pass makeunique=true" *
" to make them unique using a suffix automatically."
msg = "Duplicate variable names: $dupstr. Pass makeunique=true " *
"to make them unique using a suffix automatically."
throw(ArgumentError(msg))
end
end
Expand Down Expand Up @@ -224,8 +224,8 @@ end

@inline function Base.getindex(x::AbstractIndex, idx::AbstractVector{<:Integer})
if any(v -> v isa Bool, idx)
throw(ArgumentError("Bool values except for AbstractVector{Bool} are not" *
" allowed for column indexing"))
throw(ArgumentError("Bool values except for AbstractVector{Bool} are not " *
"allowed for column indexing"))
end
return getindex(x, Vector{Int}(idx))
end
Expand Down Expand Up @@ -318,8 +318,8 @@ function add_names(ind::Index, add_ind::AbstractIndex; makeunique::Bool=false)
if length(dups) > 0
if !makeunique
dupstr = join(string.(':', unique(u[dups])), ", ", " and ")
msg = "Duplicate variable names: $dupstr. Pass makeunique=true" *
" to make them unique using a suffix automatically."
msg = "Duplicate variable names: $dupstr. Pass makeunique=true " *
"to make them unique using a suffix automatically."
throw(ArgumentError(msg))
end
end
Expand Down
Loading