From be6158b06df9d1f9d0570c014e1f865a0a7239cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 8 Sep 2021 08:41:12 +0200 Subject: [PATCH 1/9] update docs following CSV.jl 0.9 release --- docs/src/man/basics.md | 788 ++++++++++++------------- docs/src/man/reshaping_and_pivoting.md | 22 +- docs/src/man/sorting.md | 14 +- docs/src/man/split_apply_combine.md | 30 +- 4 files changed, 427 insertions(+), 427 deletions(-) diff --git a/docs/src/man/basics.md b/docs/src/man/basics.md index 1727b5267d..59fd54c419 100644 --- a/docs/src/man/basics.md +++ b/docs/src/man/basics.md @@ -226,25 +226,25 @@ julia> german_ref = CSV.read(joinpath(dirname(pathof(DataFrames)), "..", "docs", "src", "assets", "german.csv"), DataFrame) 1000×10 DataFrame - Row │ id Age Sex Job Housing Saving accounts Checking accoun ⋯ - │ Int64 Int64 String Int64 String String String ⋯ + Row │ id Age Sex Job Housing Saving accounts Checking a ⋯ + │ Int64 Int64 InlineSt… Int64 InlineSt… InlineString15… InlineStri ⋯ ──────┼───────────────────────────────────────────────────────────────────────── - 1 │ 0 67 male 2 own NA little ⋯ - 2 │ 1 22 female 2 own little moderate - 3 │ 2 49 male 1 own little NA - 4 │ 3 45 male 2 free little little - 5 │ 4 53 male 2 free little little ⋯ - 6 │ 5 35 male 1 free NA NA - 7 │ 6 53 male 2 own quite rich NA - 8 │ 7 35 male 3 rent little moderate - ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ - 994 │ 993 30 male 3 own little little ⋯ - 995 │ 994 50 male 2 own NA NA - 996 │ 995 31 female 1 own little NA - 997 │ 996 40 male 3 own little little - 998 │ 997 38 male 2 own little NA ⋯ - 999 │ 998 23 male 2 free little little - 1000 │ 999 27 male 2 own moderate moderate + 1 │ 0 67 male 2 own NA little ⋯ + 2 │ 1 22 female 2 own little moderate + 3 │ 2 49 male 1 own little NA + 4 │ 3 45 male 2 free little little + 5 │ 4 53 male 2 free little little ⋯ + 6 │ 5 35 male 1 free NA NA + 7 │ 6 53 male 2 own quite rich NA + 8 │ 7 35 male 3 rent little moderate + ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ + 994 │ 993 30 male 3 own little little ⋯ + 995 │ 994 50 male 2 own NA NA + 996 │ 995 31 female 1 own little NA + 997 │ 996 40 male 3 own little little + 998 │ 997 38 male 2 own little NA ⋯ + 999 │ 998 23 male 2 free little little + 1000 │ 999 27 male 2 own moderate moderate 4 columns and 985 rows omitted ``` @@ -295,7 +295,7 @@ as in the case of the syntax using a `.`. ```jldoctest dataframe julia> german.Sex -1000-element PooledArrays.PooledVector{String, UInt32, Vector{UInt32}}: +1000-element PooledArrays.PooledVector{WeakRefStrings.InlineString7, UInt32, Vector{UInt32}}: "male" "female" "male" @@ -321,7 +321,7 @@ julia> colname = "Sex" "Sex" julia> german[!, colname] -1000-element PooledArrays.PooledVector{String, UInt32, Vector{UInt32}}: +1000-element PooledArrays.PooledVector{WeakRefStrings.InlineString7, UInt32, Vector{UInt32}}: "male" "female" "male" @@ -383,7 +383,7 @@ For example you can get column names with a given element type by passing this type as a second argument to the `names` function: ```jldoctest dataframe -julia> names(german, String) +julia> names(german, AbstractString) 5-element Vector{String}: "Sex" "Housing" @@ -424,14 +424,14 @@ julia> eltype.(eachcol(german)) 10-element Vector{DataType}: Int64 Int64 - String + WeakRefStrings.InlineString7 Int64 - String - String - String + WeakRefStrings.InlineString7 + WeakRefStrings.InlineString15 + WeakRefStrings.InlineString15 Int64 Int64 - String + WeakRefStrings.InlineString31 ``` !!! note @@ -455,25 +455,25 @@ julia> empty(german) julia> german 1000×10 DataFrame - Row │ id Age Sex Job Housing Saving accounts Checking accoun ⋯ - │ Int64 Int64 String Int64 String String String ⋯ + Row │ id Age Sex Job Housing Saving accounts Checking a ⋯ + │ Int64 Int64 InlineSt… Int64 InlineSt… InlineString15… InlineStri ⋯ ──────┼───────────────────────────────────────────────────────────────────────── - 1 │ 0 67 male 2 own NA little ⋯ - 2 │ 1 22 female 2 own little moderate - 3 │ 2 49 male 1 own little NA - 4 │ 3 45 male 2 free little little - 5 │ 4 53 male 2 free little little ⋯ - 6 │ 5 35 male 1 free NA NA - 7 │ 6 53 male 2 own quite rich NA - 8 │ 7 35 male 3 rent little moderate - ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ - 994 │ 993 30 male 3 own little little ⋯ - 995 │ 994 50 male 2 own NA NA - 996 │ 995 31 female 1 own little NA - 997 │ 996 40 male 3 own little little - 998 │ 997 38 male 2 own little NA ⋯ - 999 │ 998 23 male 2 free little little - 1000 │ 999 27 male 2 own moderate moderate + 1 │ 0 67 male 2 own NA little ⋯ + 2 │ 1 22 female 2 own little moderate + 3 │ 2 49 male 1 own little NA + 4 │ 3 45 male 2 free little little + 5 │ 4 53 male 2 free little little ⋯ + 6 │ 5 35 male 1 free NA NA + 7 │ 6 53 male 2 own quite rich NA + 8 │ 7 35 male 3 rent little moderate + ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ + 994 │ 993 30 male 3 own little little ⋯ + 995 │ 994 50 male 2 own NA NA + 996 │ 995 31 female 1 own little NA + 997 │ 996 40 male 3 own little little + 998 │ 997 38 male 2 own little NA ⋯ + 999 │ 998 23 male 2 free little little + 1000 │ 999 27 male 2 own moderate moderate 4 columns and 985 rows omitted julia> empty!(german) @@ -552,10 +552,10 @@ julia> describe(german, cols=1:3) 3×7 DataFrame Row │ variable mean min median max nmissing eltype │ Symbol Union… Any Union… Any Int64 DataType -─────┼──────────────────────────────────────────────────────────── +─────┼───────────────────────────────────────────────────────────────── 1 │ id 499.5 0 499.5 999 0 Int64 2 │ Age 35.546 19 33.0 75 0 Int64 - 3 │ Sex female male 0 String + 3 │ Sex female male 0 InlineString7 ``` The default statistics reported are mean, min, median, max, number of missing values, and element type of @@ -568,26 +568,26 @@ You can adjust how data frame is displayed by calling the `show` function manual ```jldoctest dataframe julia> show(german, allcols=true) 1000×10 DataFrame - Row │ id Age Sex Job Housing Saving accounts Checking account Credit amount Duration Purpose - │ Int64 Int64 String Int64 String String String Int64 Int64 String -──────┼─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── - 1 │ 0 67 male 2 own NA little 1169 6 radio/TV - 2 │ 1 22 female 2 own little moderate 5951 48 radio/TV - 3 │ 2 49 male 1 own little NA 2096 12 education - 4 │ 3 45 male 2 free little little 7882 42 furniture/equipment - 5 │ 4 53 male 2 free little little 4870 24 car - 6 │ 5 35 male 1 free NA NA 9055 36 education - 7 │ 6 53 male 2 own quite rich NA 2835 24 furniture/equipment - 8 │ 7 35 male 3 rent little moderate 6948 36 car - ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ - 994 │ 993 30 male 3 own little little 3959 36 furniture/equipment - 995 │ 994 50 male 2 own NA NA 2390 12 car - 996 │ 995 31 female 1 own little NA 1736 12 furniture/equipment - 997 │ 996 40 male 3 own little little 3857 30 car - 998 │ 997 38 male 2 own little NA 804 12 radio/TV - 999 │ 998 23 male 2 free little little 1845 45 radio/TV - 1000 │ 999 27 male 2 own moderate moderate 4576 45 car - 985 rows omitted + Row │ id Age Sex Job Housing Saving accounts Checking account Credit amount Duration Purpose + │ Int64 Int64 InlineSt… Int64 InlineSt… InlineString15… InlineString15… Int64 Int64 InlineSt… +──────┼──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── + 1 │ 0 67 male 2 own NA little 1169 6 radio/TV + 2 │ 1 22 female 2 own little moderate 5951 48 radio/TV + 3 │ 2 49 male 1 own little NA 2096 12 education + 4 │ 3 45 male 2 free little little 7882 42 furniture/equipment + 5 │ 4 53 male 2 free little little 4870 24 car + 6 │ 5 35 male 1 free NA NA 9055 36 education + 7 │ 6 53 male 2 own quite rich NA 2835 24 furniture/equipment + 8 │ 7 35 male 3 rent little moderate 6948 36 car + ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ + 994 │ 993 30 male 3 own little little 3959 36 furniture/equipment + 995 │ 994 50 male 2 own NA NA 2390 12 car + 996 │ 995 31 female 1 own little NA 1736 12 furniture/equipment + 997 │ 996 40 male 3 own little little 3857 30 car + 998 │ 997 38 male 2 own little NA 804 12 radio/TV + 999 │ 998 23 male 2 free little little 1845 45 radio/TV + 1000 │ 999 27 male 2 own moderate moderate 4576 45 car + 985 rows omitted ``` It is easy to compute descriptive statistics directly on individual columns using @@ -637,28 +637,28 @@ using the `first` and `last` functions respectively: ```jldoctest dataframe julia> first(german, 6) 6×10 DataFrame - Row │ id Age Sex Job Housing Saving accounts Checking account ⋯ - │ Int64 Int64 String Int64 String String String ⋯ + Row │ id Age Sex Job Housing Saving accounts Checking ac ⋯ + │ Int64 Int64 InlineSt… Int64 InlineSt… InlineString15… InlineStrin ⋯ ─────┼────────────────────────────────────────────────────────────────────────── - 1 │ 0 67 male 2 own NA little ⋯ - 2 │ 1 22 female 2 own little moderate - 3 │ 2 49 male 1 own little NA - 4 │ 3 45 male 2 free little little - 5 │ 4 53 male 2 free little little ⋯ - 6 │ 5 35 male 1 free NA NA - 3 columns omitted + 1 │ 0 67 male 2 own NA little ⋯ + 2 │ 1 22 female 2 own little moderate + 3 │ 2 49 male 1 own little NA + 4 │ 3 45 male 2 free little little + 5 │ 4 53 male 2 free little little ⋯ + 6 │ 5 35 male 1 free NA NA + 4 columns omitted julia> last(german, 5) 5×10 DataFrame - Row │ id Age Sex Job Housing Saving accounts Checking account ⋯ - │ Int64 Int64 String Int64 String String String ⋯ + Row │ id Age Sex Job Housing Saving accounts Checking ac ⋯ + │ Int64 Int64 InlineSt… Int64 InlineSt… InlineString15… InlineStrin ⋯ ─────┼────────────────────────────────────────────────────────────────────────── - 1 │ 995 31 female 1 own little NA ⋯ - 2 │ 996 40 male 3 own little little - 3 │ 997 38 male 2 own little NA - 4 │ 998 23 male 2 free little little - 5 │ 999 27 male 2 own moderate moderate ⋯ - 3 columns omitted + 1 │ 995 31 female 1 own little NA ⋯ + 2 │ 996 40 male 3 own little little + 3 │ 997 38 male 2 own little NA + 4 │ 998 23 male 2 free little little + 5 │ 999 27 male 2 own moderate moderate ⋯ + 4 columns omitted ``` Using `first` and `last` without passing the number of rows will return a first/last @@ -671,18 +671,18 @@ i.e. allows to update the source data frame, which is often useful. ```jldoctest dataframe julia> first(german) DataFrameRow - Row │ id Age Sex Job Housing Saving accounts Checking account ⋯ - │ Int64 Int64 String Int64 String String String ⋯ + Row │ id Age Sex Job Housing Saving accounts Checking ac ⋯ + │ Int64 Int64 InlineSt… Int64 InlineSt… InlineString15… InlineStrin ⋯ ─────┼────────────────────────────────────────────────────────────────────────── - 1 │ 0 67 male 2 own NA little ⋯ - 3 columns omitted + 1 │ 0 67 male 2 own NA little ⋯ + 4 columns omitted julia> last(german) DataFrameRow - Row │ id Age Sex Job Housing Saving accounts Checking accoun ⋯ - │ Int64 Int64 String Int64 String String String ⋯ + Row │ id Age Sex Job Housing Saving accounts Checking a ⋯ + │ Int64 Int64 InlineSt… Int64 InlineSt… InlineString15… InlineStri ⋯ ──────┼───────────────────────────────────────────────────────────────────────── - 1000 │ 999 27 male 2 own moderate moderate ⋯ + 1000 │ 999 27 male 2 own moderate moderate ⋯ 4 columns omitted ``` @@ -702,42 +702,42 @@ Here are a few examples: ```jldoctest dataframe julia> german[1:5, [:Sex, :Age]] 5×2 DataFrame - Row │ Sex Age - │ String Int64 -─────┼─────────────── - 1 │ male 67 - 2 │ female 22 - 3 │ male 49 - 4 │ male 45 - 5 │ male 53 + Row │ Sex Age + │ InlineSt… Int64 +─────┼────────────────── + 1 │ male 67 + 2 │ female 22 + 3 │ male 49 + 4 │ male 45 + 5 │ male 53 julia> german[1:5, :] 5×10 DataFrame - Row │ id Age Sex Job Housing Saving accounts Checking account ⋯ - │ Int64 Int64 String Int64 String String String ⋯ + Row │ id Age Sex Job Housing Saving accounts Checking ac ⋯ + │ Int64 Int64 InlineSt… Int64 InlineSt… InlineString15… InlineStrin ⋯ ─────┼────────────────────────────────────────────────────────────────────────── - 1 │ 0 67 male 2 own NA little ⋯ - 2 │ 1 22 female 2 own little moderate - 3 │ 2 49 male 1 own little NA - 4 │ 3 45 male 2 free little little - 5 │ 4 53 male 2 free little little ⋯ - 3 columns omitted + 1 │ 0 67 male 2 own NA little ⋯ + 2 │ 1 22 female 2 own little moderate + 3 │ 2 49 male 1 own little NA + 4 │ 3 45 male 2 free little little + 5 │ 4 53 male 2 free little little ⋯ + 4 columns omitted julia> german[[1, 6, 15], :] 3×10 DataFrame - Row │ id Age Sex Job Housing Saving accounts Checking account ⋯ - │ Int64 Int64 String Int64 String String String ⋯ + Row │ id Age Sex Job Housing Saving accounts Checking ac ⋯ + │ Int64 Int64 InlineSt… Int64 InlineSt… InlineString15… InlineStrin ⋯ ─────┼────────────────────────────────────────────────────────────────────────── - 1 │ 0 67 male 2 own NA little ⋯ - 2 │ 5 35 male 1 free NA NA - 3 │ 14 28 female 2 rent little little - 3 columns omitted + 1 │ 0 67 male 2 own NA little ⋯ + 2 │ 5 35 male 1 free NA NA + 3 │ 14 28 female 2 rent little little + 4 columns omitted julia> german[:, [:Age, :Sex]] 1000×2 DataFrame Row │ Age Sex - │ Int64 String -──────┼─────────────── + │ Int64 InlineSt… +──────┼────────────────── 1 │ 67 male 2 │ 22 female 3 │ 49 male @@ -746,7 +746,7 @@ julia> german[:, [:Age, :Sex]] 6 │ 35 male 7 │ 53 male 8 │ 35 male - ⋮ │ ⋮ ⋮ + ⋮ │ ⋮ ⋮ 994 │ 30 male 995 │ 50 male 996 │ 31 female @@ -754,7 +754,7 @@ julia> german[:, [:Age, :Sex]] 998 │ 38 male 999 │ 23 male 1000 │ 27 male - 985 rows omitted + 985 rows omitted ``` Pay attention that `german[!, [:Sex]]` and `german[:, [:Sex]]` returns a data frame object, @@ -769,8 +769,8 @@ Below we show both operations to highlight this difference: julia> german[!, [:Sex]] 1000×1 DataFrame Row │ Sex - │ String -──────┼──────── + │ InlineSt… +──────┼─────────── 1 │ male 2 │ female 3 │ male @@ -779,7 +779,7 @@ julia> german[!, [:Sex]] 6 │ male 7 │ male 8 │ male - ⋮ │ ⋮ + ⋮ │ ⋮ 994 │ male 995 │ male 996 │ female @@ -787,10 +787,10 @@ julia> german[!, [:Sex]] 998 │ male 999 │ male 1000 │ male -985 rows omitted + 985 rows omitted julia> german[!, :Sex] -1000-element PooledArrays.PooledVector{String, UInt32, Vector{UInt32}}: +1000-element PooledArrays.PooledVector{WeakRefStrings.InlineString7, UInt32, Vector{UInt32}}: "male" "female" "male" @@ -848,26 +848,26 @@ efficient than creating a materialized selection. You can create it using a `vie ```jldoctest dataframe julia> view(german, :, 2:5) 1000×4 SubDataFrame - Row │ Age Sex Job Housing - │ Int64 String Int64 String -──────┼─────────────────────────────── - 1 │ 67 male 2 own - 2 │ 22 female 2 own - 3 │ 49 male 1 own - 4 │ 45 male 2 free - 5 │ 53 male 2 free - 6 │ 35 male 1 free - 7 │ 53 male 2 own - 8 │ 35 male 3 rent - ⋮ │ ⋮ ⋮ ⋮ ⋮ - 994 │ 30 male 3 own - 995 │ 50 male 2 own - 996 │ 31 female 1 own - 997 │ 40 male 3 own - 998 │ 38 male 2 own - 999 │ 23 male 2 free - 1000 │ 27 male 2 own - 985 rows omitted + Row │ Age Sex Job Housing + │ Int64 InlineSt… Int64 InlineSt… +──────┼──────────────────────────────────── + 1 │ 67 male 2 own + 2 │ 22 female 2 own + 3 │ 49 male 1 own + 4 │ 45 male 2 free + 5 │ 53 male 2 free + 6 │ 35 male 1 free + 7 │ 53 male 2 own + 8 │ 35 male 3 rent + ⋮ │ ⋮ ⋮ ⋮ ⋮ + 994 │ 30 male 3 own + 995 │ 50 male 2 own + 996 │ 31 female 1 own + 997 │ 40 male 3 own + 998 │ 38 male 2 own + 999 │ 23 male 2 free + 1000 │ 27 male 2 own + 985 rows omitted ``` or using a `@view` macro: @@ -922,10 +922,10 @@ or a single row: ```jldoctest dataframe julia> @view german[3, 2:5] DataFrameRow - Row │ Age Sex Job Housing - │ Int64 String Int64 String -─────┼─────────────────────────────── - 3 │ 49 male 1 own + Row │ Age Sex Job Housing + │ Int64 InlineSt… Int64 InlineSt… +─────┼──────────────────────────────────── + 3 │ 49 male 1 own ``` As you can see the row and column indexing syntax is exactly the same as for indexing. @@ -960,15 +960,15 @@ In order to show how to perform mutating operations on a data frame we make a su ```jldoctest dataframe julia> df1 = german[1:6, 2:4] 6×3 DataFrame - Row │ Age Sex Job - │ Int64 String Int64 -─────┼────────────────────── - 1 │ 67 male 2 - 2 │ 22 female 2 - 3 │ 49 male 1 - 4 │ 45 male 2 - 5 │ 53 male 2 - 6 │ 35 male 1 + Row │ Age Sex Job + │ Int64 InlineSt… Int64 +─────┼───────────────────────── + 1 │ 67 male 2 + 2 │ 22 female 2 + 3 │ 49 male 1 + 4 │ 45 male 2 + 5 │ 53 male 2 + 6 │ 35 male 1 ``` In the following example we replace the column `:Age` in our `df1` data frame @@ -995,15 +995,15 @@ julia> df1.Age = val julia> df1 6×3 DataFrame - Row │ Age Sex Job - │ Int64 String Int64 -─────┼────────────────────── - 1 │ 80 male 2 - 2 │ 85 female 2 - 3 │ 98 male 1 - 4 │ 95 male 2 - 5 │ 78 male 2 - 6 │ 89 male 1 + Row │ Age Sex Job + │ Int64 InlineSt… Int64 +─────┼───────────────────────── + 1 │ 80 male 2 + 2 │ 85 female 2 + 3 │ 98 male 1 + 4 │ 95 male 2 + 5 │ 78 male 2 + 6 │ 89 male 1 ``` This is a non-copying operation. One can perform it only if `val` vector has the same length as number @@ -1027,15 +1027,15 @@ julia> df1[1:3, :Job] = [2, 3, 2] julia> df1 6×3 DataFrame - Row │ Age Sex Job - │ Int64 String Int64 -─────┼────────────────────── - 1 │ 80 male 2 - 2 │ 85 female 3 - 3 │ 98 male 2 - 4 │ 95 male 2 - 5 │ 78 male 2 - 6 │ 89 male 1 + Row │ Age Sex Job + │ Int64 InlineSt… Int64 +─────┼───────────────────────── + 1 │ 80 male 2 + 2 │ 85 female 3 + 3 │ 98 male 2 + 4 │ 95 male 2 + 5 │ 78 male 2 + 6 │ 89 male 1 ``` As a special rule using `!` as row selector replaces column without copying @@ -1395,25 +1395,25 @@ Drop `:Age` column: ```jldoctest dataframe julia> german[:, Not(:Age)] 1000×9 DataFrame - Row │ id Sex Job Housing Saving accounts Checking account Cred ⋯ - │ Int64 String Int64 String String String Int6 ⋯ + Row │ id Sex Job Housing Saving accounts Checking account ⋯ + │ Int64 InlineSt… Int64 InlineSt… InlineString15… InlineString15… ⋯ ──────┼───────────────────────────────────────────────────────────────────────── - 1 │ 0 male 2 own NA little ⋯ - 2 │ 1 female 2 own little moderate - 3 │ 2 male 1 own little NA - 4 │ 3 male 2 free little little - 5 │ 4 male 2 free little little ⋯ - 6 │ 5 male 1 free NA NA - 7 │ 6 male 2 own quite rich NA - 8 │ 7 male 3 rent little moderate - ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ - 994 │ 993 male 3 own little little ⋯ - 995 │ 994 male 2 own NA NA - 996 │ 995 female 1 own little NA - 997 │ 996 male 3 own little little - 998 │ 997 male 2 own little NA ⋯ - 999 │ 998 male 2 free little little - 1000 │ 999 male 2 own moderate moderate + 1 │ 0 male 2 own NA little ⋯ + 2 │ 1 female 2 own little moderate + 3 │ 2 male 1 own little NA + 4 │ 3 male 2 free little little + 5 │ 4 male 2 free little little ⋯ + 6 │ 5 male 1 free NA NA + 7 │ 6 male 2 own quite rich NA + 8 │ 7 male 3 rent little moderate + ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ + 994 │ 993 male 3 own little little ⋯ + 995 │ 994 male 2 own NA NA + 996 │ 995 female 1 own little NA + 997 │ 996 male 3 own little little + 998 │ 997 male 2 own little NA ⋯ + 999 │ 998 male 2 free little little + 1000 │ 999 male 2 own moderate moderate 3 columns and 985 rows omitted ``` @@ -1450,26 +1450,26 @@ selectors passed as its arguments: ```jldoctest dataframe julia> german[:, Cols("Age", Between("Sex", "Job"))] 1000×3 DataFrame - Row │ Age Sex Job - │ Int64 String Int64 -──────┼────────────────────── - 1 │ 67 male 2 - 2 │ 22 female 2 - 3 │ 49 male 1 - 4 │ 45 male 2 - 5 │ 53 male 2 - 6 │ 35 male 1 - 7 │ 53 male 2 - 8 │ 35 male 3 - ⋮ │ ⋮ ⋮ ⋮ - 994 │ 30 male 3 - 995 │ 50 male 2 - 996 │ 31 female 1 - 997 │ 40 male 3 - 998 │ 38 male 2 - 999 │ 23 male 2 - 1000 │ 27 male 2 - 985 rows omitted + Row │ Age Sex Job + │ Int64 InlineSt… Int64 +──────┼───────────────────────── + 1 │ 67 male 2 + 2 │ 22 female 2 + 3 │ 49 male 1 + 4 │ 45 male 2 + 5 │ 53 male 2 + 6 │ 35 male 1 + 7 │ 53 male 2 + 8 │ 35 male 3 + ⋮ │ ⋮ ⋮ ⋮ + 994 │ 30 male 3 + 995 │ 50 male 2 + 996 │ 31 female 1 + 997 │ 40 male 3 + 998 │ 38 male 2 + 999 │ 23 male 2 + 1000 │ 27 male 2 + 985 rows omitted ``` You can also use `Regex` (regular expressions) to select columns. In the example @@ -1478,26 +1478,26 @@ below we select columns that have `"S"` in their name and also we use `Not` to d ```jldoctest dataframe julia> german[Not(5), r"S"] 999×2 DataFrame - Row │ Sex Saving accounts - │ String String -─────┼───────────────────────── - 1 │ male NA - 2 │ female little - 3 │ male little - 4 │ male little - 5 │ male NA - 6 │ male quite rich - 7 │ male little - 8 │ male rich - ⋮ │ ⋮ ⋮ - 993 │ male little - 994 │ male NA - 995 │ female little - 996 │ male little - 997 │ male little - 998 │ male little - 999 │ male moderate - 984 rows omitted + Row │ Sex Saving accounts + │ InlineSt… InlineString15… +─────┼──────────────────────────── + 1 │ male NA + 2 │ female little + 3 │ male little + 4 │ male little + 5 │ male NA + 6 │ male quite rich + 7 │ male little + 8 │ male rich + ⋮ │ ⋮ ⋮ + 993 │ male little + 994 │ male NA + 995 │ female little + 996 │ male little + 997 │ male little + 998 │ male little + 999 │ male moderate + 984 rows omitted ``` ## Basic Usage of Transformation Functions @@ -1581,8 +1581,8 @@ exactly like in `select`. Here is an example: julia> combine(german, :Age => mean => :mean_age, :Housing => unique => :housing) 3×2 DataFrame Row │ mean_age housing - │ Float64 String -─────┼─────────────────── + │ Float64 InlineSt… +─────┼───────────────────── 1 │ 35.546 own 2 │ 35.546 free 3 │ 35.546 rent @@ -1689,25 +1689,25 @@ the resulting data frame: ```jldoctest dataframe julia> select(german, Not(:Age)) 1000×9 DataFrame - Row │ id Sex Job Housing Saving accounts Checking account Cred ⋯ - │ Int64 String Int64 String String String Int6 ⋯ + Row │ id Sex Job Housing Saving accounts Checking account ⋯ + │ Int64 InlineSt… Int64 InlineSt… InlineString15… InlineString15… ⋯ ──────┼───────────────────────────────────────────────────────────────────────── - 1 │ 0 male 2 own NA little ⋯ - 2 │ 1 female 2 own little moderate - 3 │ 2 male 1 own little NA - 4 │ 3 male 2 free little little - 5 │ 4 male 2 free little little ⋯ - 6 │ 5 male 1 free NA NA - 7 │ 6 male 2 own quite rich NA - 8 │ 7 male 3 rent little moderate - ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ - 994 │ 993 male 3 own little little ⋯ - 995 │ 994 male 2 own NA NA - 996 │ 995 female 1 own little NA - 997 │ 996 male 3 own little little - 998 │ 997 male 2 own little NA ⋯ - 999 │ 998 male 2 free little little - 1000 │ 999 male 2 own moderate moderate + 1 │ 0 male 2 own NA little ⋯ + 2 │ 1 female 2 own little moderate + 3 │ 2 male 1 own little NA + 4 │ 3 male 2 free little little + 5 │ 4 male 2 free little little ⋯ + 6 │ 5 male 1 free NA NA + 7 │ 6 male 2 own quite rich NA + 8 │ 7 male 3 rent little moderate + ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ + 994 │ 993 male 3 own little little ⋯ + 995 │ 994 male 2 own NA NA + 996 │ 995 female 1 own little NA + 997 │ 996 male 3 own little little + 998 │ 997 male 2 own little NA ⋯ + 999 │ 998 male 2 free little little + 1000 │ 999 male 2 own moderate moderate 3 columns and 985 rows omitted ``` @@ -1718,26 +1718,26 @@ as we have described above: ```jldoctest dataframe julia> select(german, r"S") 1000×2 DataFrame - Row │ Sex Saving accounts - │ String String -──────┼───────────────────────── - 1 │ male NA - 2 │ female little - 3 │ male little - 4 │ male little - 5 │ male little - 6 │ male NA - 7 │ male quite rich - 8 │ male little - ⋮ │ ⋮ ⋮ - 994 │ male little - 995 │ male NA - 996 │ female little - 997 │ male little - 998 │ male little - 999 │ male little - 1000 │ male moderate - 985 rows omitted + Row │ Sex Saving accounts + │ InlineSt… InlineString15… +──────┼──────────────────────────── + 1 │ male NA + 2 │ female little + 3 │ male little + 4 │ male little + 5 │ male little + 6 │ male NA + 7 │ male quite rich + 8 │ male little + ⋮ │ ⋮ ⋮ + 994 │ male little + 995 │ male NA + 996 │ female little + 997 │ male little + 998 │ male little + 999 │ male little + 1000 │ male moderate + 985 rows omitted ``` The benefit of `select` or `combine` over indexing is that it is easier @@ -1746,26 +1746,26 @@ to combine several column selectors, e.g.: ```jldoctest dataframe julia> select(german, r"S", "Job", 1) 1000×4 DataFrame - Row │ Sex Saving accounts Job id - │ String String Int64 Int64 -──────┼─────────────────────────────────────── - 1 │ male NA 2 0 - 2 │ female little 2 1 - 3 │ male little 1 2 - 4 │ male little 2 3 - 5 │ male little 2 4 - 6 │ male NA 1 5 - 7 │ male quite rich 2 6 - 8 │ male little 3 7 - ⋮ │ ⋮ ⋮ ⋮ ⋮ - 994 │ male little 3 993 - 995 │ male NA 2 994 - 996 │ female little 1 995 - 997 │ male little 3 996 - 998 │ male little 2 997 - 999 │ male little 2 998 - 1000 │ male moderate 2 999 - 985 rows omitted + Row │ Sex Saving accounts Job id + │ InlineSt… InlineString15… Int64 Int64 +──────┼────────────────────────────────────────── + 1 │ male NA 2 0 + 2 │ female little 2 1 + 3 │ male little 1 2 + 4 │ male little 2 3 + 5 │ male little 2 4 + 6 │ male NA 1 5 + 7 │ male quite rich 2 6 + 8 │ male little 3 7 + ⋮ │ ⋮ ⋮ ⋮ ⋮ + 994 │ male little 3 993 + 995 │ male NA 2 994 + 996 │ female little 1 995 + 997 │ male little 3 996 + 998 │ male little 2 997 + 999 │ male little 2 998 + 1000 │ male moderate 2 999 + 985 rows omitted ``` Taking advantage of this flexibility here is an idiomatic pattern to move some column to the front of a data frame: @@ -1773,25 +1773,25 @@ Taking advantage of this flexibility here is an idiomatic pattern to move some c ```jldoctest dataframe julia> select(german, "Sex", :) 1000×10 DataFrame - Row │ Sex id Age Job Housing Saving accounts Checking accoun ⋯ - │ String Int64 Int64 Int64 String String String ⋯ + Row │ Sex id Age Job Housing Saving accounts Checking a ⋯ + │ InlineSt… Int64 Int64 Int64 InlineSt… InlineString15… InlineStri ⋯ ──────┼───────────────────────────────────────────────────────────────────────── - 1 │ male 0 67 2 own NA little ⋯ - 2 │ female 1 22 2 own little moderate - 3 │ male 2 49 1 own little NA - 4 │ male 3 45 2 free little little - 5 │ male 4 53 2 free little little ⋯ - 6 │ male 5 35 1 free NA NA - 7 │ male 6 53 2 own quite rich NA - 8 │ male 7 35 3 rent little moderate - ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ - 994 │ male 993 30 3 own little little ⋯ - 995 │ male 994 50 2 own NA NA - 996 │ female 995 31 1 own little NA - 997 │ male 996 40 3 own little little - 998 │ male 997 38 2 own little NA ⋯ - 999 │ male 998 23 2 free little little - 1000 │ male 999 27 2 own moderate moderate + 1 │ male 0 67 2 own NA little ⋯ + 2 │ female 1 22 2 own little moderate + 3 │ male 2 49 1 own little NA + 4 │ male 3 45 2 free little little + 5 │ male 4 53 2 free little little ⋯ + 6 │ male 5 35 1 free NA NA + 7 │ male 6 53 2 own quite rich NA + 8 │ male 7 35 3 rent little moderate + ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ + 994 │ male 993 30 3 own little little ⋯ + 995 │ male 994 50 2 own NA NA + 996 │ female 995 31 1 own little NA + 997 │ male 996 40 3 own little little + 998 │ male 997 38 2 own little NA ⋯ + 999 │ male 998 23 2 free little little + 1000 │ male 999 27 2 own moderate moderate 4 columns and 985 rows omitted ``` @@ -1801,26 +1801,26 @@ Below, we are simply passing source column and target column name to rename them ```jldoctest dataframe julia> select(german, :Sex => :x1, :Age => :x2) 1000×2 DataFrame - Row │ x1 x2 - │ String Int64 -──────┼─────────────── - 1 │ male 67 - 2 │ female 22 - 3 │ male 49 - 4 │ male 45 - 5 │ male 53 - 6 │ male 35 - 7 │ male 53 - 8 │ male 35 - ⋮ │ ⋮ ⋮ - 994 │ male 30 - 995 │ male 50 - 996 │ female 31 - 997 │ male 40 - 998 │ male 38 - 999 │ male 23 - 1000 │ male 27 - 985 rows omitted + Row │ x1 x2 + │ InlineSt… Int64 +──────┼────────────────── + 1 │ male 67 + 2 │ female 22 + 3 │ male 49 + 4 │ male 45 + 5 │ male 53 + 6 │ male 35 + 7 │ male 53 + 8 │ male 35 + ⋮ │ ⋮ ⋮ + 994 │ male 30 + 995 │ male 50 + 996 │ female 31 + 997 │ male 40 + 998 │ male 38 + 999 │ male 23 + 1000 │ male 27 + 985 rows omitted ``` It is important to note that `select` always returns a data frame, even if a single column selected @@ -1881,8 +1881,8 @@ the `copycols=false` keyword argument: julia> df = select(german, :Sex) 1000×1 DataFrame Row │ Sex - │ String -──────┼──────── + │ InlineSt… +──────┼─────────── 1 │ male 2 │ female 3 │ male @@ -1891,7 +1891,7 @@ julia> df = select(german, :Sex) 6 │ male 7 │ male 8 │ male - ⋮ │ ⋮ + ⋮ │ ⋮ 994 │ male 995 │ male 996 │ female @@ -1899,7 +1899,7 @@ julia> df = select(german, :Sex) 998 │ male 999 │ male 1000 │ male -985 rows omitted + 985 rows omitted julia> df.Sex === german.Sex # copy false @@ -1907,8 +1907,8 @@ false julia> df = select(german, :Sex, copycols=false) 1000×1 DataFrame Row │ Sex - │ String -──────┼──────── + │ InlineSt… +──────┼─────────── 1 │ male 2 │ female 3 │ male @@ -1917,7 +1917,7 @@ julia> df = select(german, :Sex, copycols=false) 6 │ male 7 │ male 8 │ male - ⋮ │ ⋮ + ⋮ │ ⋮ 994 │ male 995 │ male 996 │ female @@ -1925,7 +1925,7 @@ julia> df = select(german, :Sex, copycols=false) 998 │ male 999 │ male 1000 │ male -985 rows omitted + 985 rows omitted julia> df.Sex === german.Sex # no-copy is performed true @@ -1938,25 +1938,25 @@ julia> select!(german, Not(:Age)); julia> german 1000×9 DataFrame - Row │ id Sex Job Housing Saving accounts Checking account Cred ⋯ - │ Int64 String Int64 String String String Int6 ⋯ + Row │ id Sex Job Housing Saving accounts Checking account ⋯ + │ Int64 InlineSt… Int64 InlineSt… InlineString15… InlineString15… ⋯ ──────┼───────────────────────────────────────────────────────────────────────── - 1 │ 0 male 2 own NA little ⋯ - 2 │ 1 female 2 own little moderate - 3 │ 2 male 1 own little NA - 4 │ 3 male 2 free little little - 5 │ 4 male 2 free little little ⋯ - 6 │ 5 male 1 free NA NA - 7 │ 6 male 2 own quite rich NA - 8 │ 7 male 3 rent little moderate - ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ - 994 │ 993 male 3 own little little ⋯ - 995 │ 994 male 2 own NA NA - 996 │ 995 female 1 own little NA - 997 │ 996 male 3 own little little - 998 │ 997 male 2 own little NA ⋯ - 999 │ 998 male 2 free little little - 1000 │ 999 male 2 own moderate moderate + 1 │ 0 male 2 own NA little ⋯ + 2 │ 1 female 2 own little moderate + 3 │ 2 male 1 own little NA + 4 │ 3 male 2 free little little + 5 │ 4 male 2 free little little ⋯ + 6 │ 5 male 1 free NA NA + 7 │ 6 male 2 own quite rich NA + 8 │ 7 male 3 rent little moderate + ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ + 994 │ 993 male 3 own little little ⋯ + 995 │ 994 male 2 own NA NA + 996 │ 995 female 1 own little NA + 997 │ 996 male 3 own little little + 998 │ 997 male 2 own little NA ⋯ + 999 │ 998 male 2 free little little + 1000 │ 999 male 2 own moderate moderate 3 columns and 985 rows omitted ``` @@ -1970,31 +1970,31 @@ julia> german = copy(german_ref); julia> df = german_ref[1:8, 1:5] 8×5 DataFrame - Row │ id Age Sex Job Housing - │ Int64 Int64 String Int64 String -─────┼────────────────────────────────────── - 1 │ 0 67 male 2 own - 2 │ 1 22 female 2 own - 3 │ 2 49 male 1 own - 4 │ 3 45 male 2 free - 5 │ 4 53 male 2 free - 6 │ 5 35 male 1 free - 7 │ 6 53 male 2 own - 8 │ 7 35 male 3 rent + Row │ id Age Sex Job Housing + │ Int64 Int64 InlineSt… Int64 InlineSt… +─────┼─────────────────────────────────────────── + 1 │ 0 67 male 2 own + 2 │ 1 22 female 2 own + 3 │ 2 49 male 1 own + 4 │ 3 45 male 2 free + 5 │ 4 53 male 2 free + 6 │ 5 35 male 1 free + 7 │ 6 53 male 2 own + 8 │ 7 35 male 3 rent julia> transform(df, :Age => maximum) 8×6 DataFrame - Row │ id Age Sex Job Housing Age_maximum - │ Int64 Int64 String Int64 String Int64 -─────┼─────────────────────────────────────────────────── - 1 │ 0 67 male 2 own 67 - 2 │ 1 22 female 2 own 67 - 3 │ 2 49 male 1 own 67 - 4 │ 3 45 male 2 free 67 - 5 │ 4 53 male 2 free 67 - 6 │ 5 35 male 1 free 67 - 7 │ 6 53 male 2 own 67 - 8 │ 7 35 male 3 rent 67 + Row │ id Age Sex Job Housing Age_maximum + │ Int64 Int64 InlineSt… Int64 InlineSt… Int64 +─────┼──────────────────────────────────────────────────────── + 1 │ 0 67 male 2 own 67 + 2 │ 1 22 female 2 own 67 + 3 │ 2 49 male 1 own 67 + 4 │ 3 45 male 2 free 67 + 5 │ 4 53 male 2 free 67 + 6 │ 5 35 male 1 free 67 + 7 │ 6 53 male 2 own 67 + 8 │ 7 35 male 3 rent 67 ``` In the example below we are swapping values stored in columns `:Sex` and `:Age`: @@ -2002,25 +2002,25 @@ In the example below we are swapping values stored in columns `:Sex` and `:Age`: ```jldoctest dataframe julia> transform(german, :Age => :Sex, :Sex => :Age) 1000×10 DataFrame - Row │ id Age Sex Job Housing Saving accounts Checking accoun ⋯ - │ Int64 String Int64 Int64 String String String ⋯ + Row │ id Age Sex Job Housing Saving accounts Checking a ⋯ + │ Int64 InlineSt… Int64 Int64 InlineSt… InlineString15… InlineStri ⋯ ──────┼───────────────────────────────────────────────────────────────────────── - 1 │ 0 male 67 2 own NA little ⋯ - 2 │ 1 female 22 2 own little moderate - 3 │ 2 male 49 1 own little NA - 4 │ 3 male 45 2 free little little - 5 │ 4 male 53 2 free little little ⋯ - 6 │ 5 male 35 1 free NA NA - 7 │ 6 male 53 2 own quite rich NA - 8 │ 7 male 35 3 rent little moderate - ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ - 994 │ 993 male 30 3 own little little ⋯ - 995 │ 994 male 50 2 own NA NA - 996 │ 995 female 31 1 own little NA - 997 │ 996 male 40 3 own little little - 998 │ 997 male 38 2 own little NA ⋯ - 999 │ 998 male 23 2 free little little - 1000 │ 999 male 27 2 own moderate moderate + 1 │ 0 male 67 2 own NA little ⋯ + 2 │ 1 female 22 2 own little moderate + 3 │ 2 male 49 1 own little NA + 4 │ 3 male 45 2 free little little + 5 │ 4 male 53 2 free little little ⋯ + 6 │ 5 male 35 1 free NA NA + 7 │ 6 male 53 2 own quite rich NA + 8 │ 7 male 35 3 rent little moderate + ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ + 994 │ 993 male 30 3 own little little ⋯ + 995 │ 994 male 50 2 own NA NA + 996 │ 995 female 31 1 own little NA + 997 │ 996 male 40 3 own little little + 998 │ 997 male 38 2 own little NA ⋯ + 999 │ 998 male 23 2 free little little + 1000 │ 999 male 27 2 own moderate moderate 4 columns and 985 rows omitted ``` diff --git a/docs/src/man/reshaping_and_pivoting.md b/docs/src/man/reshaping_and_pivoting.md index d4ea63d200..2122cfa67e 100755 --- a/docs/src/man/reshaping_and_pivoting.md +++ b/docs/src/man/reshaping_and_pivoting.md @@ -10,7 +10,7 @@ julia> iris = CSV.read((joinpath(dirname(pathof(DataFrames)), DataFrame) 150×5 DataFrame Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 String + │ Float64 Float64 Float64 Float64 InlineSt… ─────┼────────────────────────────────────────────────────────────────── 1 │ 5.1 3.5 1.4 0.2 Iris-setosa 2 │ 4.9 3.0 1.4 0.2 Iris-setosa @@ -33,7 +33,7 @@ julia> iris = CSV.read((joinpath(dirname(pathof(DataFrames)), julia> stack(iris, 1:4) 600×3 DataFrame Row │ Species variable value - │ String String Float64 + │ InlineSt… String Float64 ─────┼────────────────────────────────────── 1 │ Iris-setosa SepalLength 5.1 2 │ Iris-setosa SepalLength 4.9 @@ -62,7 +62,7 @@ be given: julia> stack(iris, [:SepalLength, :SepalWidth, :PetalLength, :PetalWidth]) 600×3 DataFrame Row │ Species variable value - │ String String Float64 + │ InlineSt… String Float64 ─────┼────────────────────────────────────── 1 │ Iris-setosa SepalLength 5.1 2 │ Iris-setosa SepalLength 4.9 @@ -100,7 +100,7 @@ the long format: julia> stack(iris, [:SepalLength, :SepalWidth], :Species) 300×3 DataFrame Row │ Species variable value - │ String String Float64 + │ InlineSt… String Float64 ─────┼────────────────────────────────────── 1 │ Iris-setosa SepalLength 5.1 2 │ Iris-setosa SepalLength 4.9 @@ -127,7 +127,7 @@ If you prefer to specify the id columns then use `Not` with `stack` like this: julia> stack(iris, Not(:Species)) 600×3 DataFrame Row │ Species variable value - │ String String Float64 + │ InlineSt… String Float64 ─────┼────────────────────────────────────── 1 │ Iris-setosa SepalLength 5.1 2 │ Iris-setosa SepalLength 4.9 @@ -159,7 +159,7 @@ julia> iris.id = 1:size(iris, 1) julia> longdf = stack(iris, Not([:Species, :id])) 600×4 DataFrame Row │ Species id variable value - │ String Int64 String Float64 + │ InlineSt… Int64 String Float64 ─────┼───────────────────────────────────────────── 1 │ Iris-setosa 1 SepalLength 5.1 2 │ Iris-setosa 2 SepalLength 4.9 @@ -209,7 +209,7 @@ If the remaining columns are unique, you can skip the id variable and use: julia> unstack(longdf, :variable, :value) 150×6 DataFrame Row │ Species id SepalLength SepalWidth PetalLength PetalWidth ⋯ - │ String Int64 Float64? Float64? Float64? Float64? ⋯ + │ InlineSt… Int64 Float64? Float64? Float64? Float64? ⋯ ─────┼────────────────────────────────────────────────────────────────────────── 1 │ Iris-setosa 1 5.1 3.5 1.4 0.2 ⋯ 2 │ Iris-setosa 2 4.9 3.0 1.4 0.2 @@ -236,7 +236,7 @@ arguments, as they will be used by default, and write: julia> unstack(longdf) 150×6 DataFrame Row │ Species id SepalLength SepalWidth PetalLength PetalWidth ⋯ - │ String Int64 Float64? Float64? Float64? Float64? ⋯ + │ InlineSt… Int64 Float64? Float64? Float64? Float64? ⋯ ─────┼────────────────────────────────────────────────────────────────────────── 1 │ Iris-setosa 1 5.1 3.5 1.4 0.2 ⋯ 2 │ Iris-setosa 2 4.9 3.0 1.4 0.2 @@ -264,7 +264,7 @@ the original wide data frame. Here is an example: julia> stack(iris, view=true) 600×4 DataFrame Row │ Species id variable value - │ String Int64 String Float64 + │ InlineSt… Int64 String Float64 ─────┼───────────────────────────────────────────── 1 │ Iris-setosa 1 SepalLength 5.1 2 │ Iris-setosa 2 SepalLength 4.9 @@ -306,7 +306,7 @@ julia> using Statistics julia> d = stack(iris, Not(:Species)) 750×3 DataFrame Row │ Species variable value - │ String String Float64 + │ InlineSt… String Float64 ─────┼────────────────────────────────────── 1 │ Iris-setosa SepalLength 5.1 2 │ Iris-setosa SepalLength 4.9 @@ -329,7 +329,7 @@ julia> d = stack(iris, Not(:Species)) julia> x = combine(groupby(d, [:variable, :Species]), :value => mean => :vsum) 15×3 DataFrame Row │ variable Species vsum - │ String String Float64 + │ String InlineSt… Float64 ─────┼─────────────────────────────────────── 1 │ SepalLength Iris-setosa 5.006 2 │ SepalLength Iris-versicolor 5.936 diff --git a/docs/src/man/sorting.md b/docs/src/man/sorting.md index 762ef205eb..d90e520bc1 100644 --- a/docs/src/man/sorting.md +++ b/docs/src/man/sorting.md @@ -10,7 +10,7 @@ julia> iris = CSV.read((joinpath(dirname(pathof(DataFrames)), DataFrame) 150×5 DataFrame Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 String + │ Float64 Float64 Float64 Float64 InlineSt… ─────┼────────────────────────────────────────────────────────────────── 1 │ 5.1 3.5 1.4 0.2 Iris-setosa 2 │ 4.9 3.0 1.4 0.2 Iris-setosa @@ -33,7 +33,7 @@ julia> iris = CSV.read((joinpath(dirname(pathof(DataFrames)), julia> sort!(iris) 150×5 DataFrame Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 String + │ Float64 Float64 Float64 Float64 InlineSt… ─────┼────────────────────────────────────────────────────────────────── 1 │ 4.3 3.0 1.1 0.1 Iris-setosa 2 │ 4.4 2.9 1.4 0.2 Iris-setosa @@ -65,7 +65,7 @@ Here are some examples showing most of the possible options: julia> sort!(iris, rev = true) 150×5 DataFrame Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 String + │ Float64 Float64 Float64 Float64 InlineSt… ─────┼────────────────────────────────────────────────────────────────── 1 │ 7.9 3.8 6.4 2.0 Iris-virginica 2 │ 7.7 3.8 6.7 2.2 Iris-virginica @@ -88,7 +88,7 @@ julia> sort!(iris, rev = true) julia> sort!(iris, [:Species, :SepalWidth]) 150×5 DataFrame Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 String + │ Float64 Float64 Float64 Float64 InlineSt… ─────┼────────────────────────────────────────────────────────────────── 1 │ 4.5 2.3 1.3 0.3 Iris-setosa 2 │ 4.4 2.9 1.4 0.2 Iris-setosa @@ -111,7 +111,7 @@ julia> sort!(iris, [:Species, :SepalWidth]) julia> sort!(iris, [order(:Species, by=length), order(:SepalLength, rev=true)]) 150×5 DataFrame Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 String + │ Float64 Float64 Float64 Float64 InlineSt… ─────┼─────────────────────────────────────────────────────────────────── 1 │ 5.8 4.0 1.2 0.2 Iris-setosa 2 │ 5.7 3.8 1.7 0.3 Iris-setosa @@ -148,7 +148,7 @@ rows will be sorted by increasing `:PetalLength`: julia> sort!(iris, [:Species, :PetalLength], rev=(true, false)) 150×5 DataFrame Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 String + │ Float64 Float64 Float64 Float64 InlineSt… ─────┼────────────────────────────────────────────────────────────────── 1 │ 4.9 2.5 4.5 1.7 Iris-virginica 2 │ 6.2 2.8 4.8 1.8 Iris-virginica @@ -171,7 +171,7 @@ julia> sort!(iris, [:Species, :PetalLength], rev=(true, false)) julia> sort!(iris, [order(:Species, rev=true), :PetalLength]) 150×5 DataFrame Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 String + │ Float64 Float64 Float64 Float64 InlineSt… ─────┼────────────────────────────────────────────────────────────────── 1 │ 4.9 2.5 4.5 1.7 Iris-virginica 2 │ 6.2 2.8 4.8 1.8 Iris-virginica diff --git a/docs/src/man/split_apply_combine.md b/docs/src/man/split_apply_combine.md index cd003397ed..ab9be0d0ac 100644 --- a/docs/src/man/split_apply_combine.md +++ b/docs/src/man/split_apply_combine.md @@ -166,7 +166,7 @@ julia> iris = CSV.read((joinpath(dirname(pathof(DataFrames)), DataFrame) 150×5 DataFrame Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 String + │ Float64 Float64 Float64 Float64 InlineSt… ─────┼────────────────────────────────────────────────────────────────── 1 │ 5.1 3.5 1.4 0.2 Iris-setosa 2 │ 4.9 3.0 1.4 0.2 Iris-setosa @@ -190,7 +190,7 @@ julia> gdf = groupby(iris, :Species) GroupedDataFrame with 3 groups based on key: Species First Group (50 rows): Species = "Iris-setosa" Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 String + │ Float64 Float64 Float64 Float64 InlineSt… ─────┼─────────────────────────────────────────────────────────────── 1 │ 5.1 3.5 1.4 0.2 Iris-setosa 2 │ 4.9 3.0 1.4 0.2 Iris-setosa @@ -213,7 +213,7 @@ First Group (50 rows): Species = "Iris-setosa" ⋮ Last Group (50 rows): Species = "Iris-virginica" Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 String + │ Float64 Float64 Float64 Float64 InlineSt… ─────┼────────────────────────────────────────────────────────────────── 1 │ 6.3 3.3 6.0 2.5 Iris-virginica 2 │ 5.8 2.7 5.1 1.9 Iris-virginica @@ -237,7 +237,7 @@ Last Group (50 rows): Species = "Iris-virginica" julia> combine(gdf, :PetalLength => mean) 3×2 DataFrame Row │ Species PetalLength_mean - │ String Float64 + │ InlineSt… Float64 ─────┼─────────────────────────────────── 1 │ Iris-setosa 1.464 2 │ Iris-versicolor 4.26 @@ -246,7 +246,7 @@ julia> combine(gdf, :PetalLength => mean) julia> combine(gdf, nrow) 3×2 DataFrame Row │ Species nrow - │ String Int64 + │ InlineSt… Int64 ─────┼──────────────────────── 1 │ Iris-setosa 50 2 │ Iris-versicolor 50 @@ -255,7 +255,7 @@ julia> combine(gdf, nrow) julia> combine(gdf, nrow, :PetalLength => mean => :mean) 3×3 DataFrame Row │ Species nrow mean - │ String Int64 Float64 + │ InlineSt… Int64 Float64 ─────┼───────────────────────────────── 1 │ Iris-setosa 50 1.464 2 │ Iris-versicolor 50 4.26 @@ -265,7 +265,7 @@ julia> combine(gdf, [:PetalLength, :SepalLength] => ((p, s) -> (a=mean(p)/mean(s AsTable) # multiple columns are passed as arguments 3×3 DataFrame Row │ Species a b - │ String Float64 Float64 + │ InlineSt… Float64 Float64 ─────┼──────────────────────────────────── 1 │ Iris-setosa 0.292449 73.2 2 │ Iris-versicolor 0.717655 213.0 @@ -276,7 +276,7 @@ julia> combine(gdf, x -> std(x.PetalLength) / std(x.SepalLength)) # passing a NamedTuple 3×2 DataFrame Row │ Species PetalLength_SepalLength_function - │ String Float64 + │ InlineSt… Float64 ─────┼─────────────────────────────────────────────────── 1 │ Iris-setosa 0.492245 2 │ Iris-versicolor 0.910378 @@ -285,7 +285,7 @@ julia> combine(gdf, julia> combine(x -> std(x.PetalLength) / std(x.SepalLength), gdf) # passing a SubDataFrame 3×2 DataFrame Row │ Species x1 - │ String Float64 + │ InlineSt… Float64 ─────┼─────────────────────────── 1 │ Iris-setosa 0.492245 2 │ Iris-versicolor 0.910378 @@ -294,7 +294,7 @@ julia> combine(x -> std(x.PetalLength) / std(x.SepalLength), gdf) # passing a Su julia> combine(gdf, 1:2 => cor, nrow) 3×3 DataFrame Row │ Species SepalLength_SepalWidth_cor nrow - │ String Float64 Int64 + │ InlineSt… Float64 Int64 ─────┼──────────────────────────────────────────────────── 1 │ Iris-setosa 0.74678 50 2 │ Iris-versicolor 0.525911 50 @@ -303,7 +303,7 @@ julia> combine(gdf, 1:2 => cor, nrow) julia> combine(gdf, :PetalLength => (x -> [extrema(x)]) => [:min, :max]) 3×3 DataFrame Row │ Species min max - │ String Float64 Float64 + │ InlineSt… Float64 Float64 ─────┼─────────────────────────────────── 1 │ Iris-setosa 1.0 1.9 2 │ Iris-versicolor 3.0 5.1 @@ -356,7 +356,7 @@ julia> combine(gdf) do df end 3×3 DataFrame Row │ Species m s² - │ String Float64 Float64 + │ InlineSt… Float64 Float64 ─────┼───────────────────────────────────── 1 │ Iris-setosa 1.464 0.0301061 2 │ Iris-versicolor 4.26 0.220816 @@ -478,7 +478,7 @@ julia> gd = groupby(iris, :Species) GroupedDataFrame with 3 groups based on key: Species First Group (50 rows): Species = "Iris-setosa" Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 String + │ Float64 Float64 Float64 Float64 InlineSt… ─────┼─────────────────────────────────────────────────────────────── 1 │ 5.1 3.5 1.4 0.2 Iris-setosa 2 │ 4.9 3.0 1.4 0.2 Iris-setosa @@ -501,7 +501,7 @@ First Group (50 rows): Species = "Iris-setosa" ⋮ Last Group (50 rows): Species = "Iris-virginica" Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 String + │ Float64 Float64 Float64 Float64 InlineSt… ─────┼────────────────────────────────────────────────────────────────── 1 │ 6.3 3.3 6.0 2.5 Iris-virginica 2 │ 5.8 2.7 5.1 1.9 Iris-virginica @@ -525,7 +525,7 @@ Last Group (50 rows): Species = "Iris-virginica" julia> combine(gd, valuecols(gd) .=> mean) 3×5 DataFrame Row │ Species SepalLength_mean SepalWidth_mean PetalLength_mean P ⋯ - │ String Float64 Float64 Float64 F ⋯ + │ InlineSt… Float64 Float64 Float64 F ⋯ ─────┼────────────────────────────────────────────────────────────────────────── 1 │ Iris-setosa 5.006 3.418 1.464 ⋯ 2 │ Iris-versicolor 5.936 2.77 4.26 From 10efa01ce56e940eb6f8e4a63cfc9af00882a3f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Thu, 9 Sep 2021 00:33:44 +0200 Subject: [PATCH 2/9] update for CSV.jl 0.9.1 --- docs/src/man/basics.md | 786 +++++++++++++------------ docs/src/man/querying_frameworks.md | 4 +- docs/src/man/reshaping_and_pivoting.md | 22 +- docs/src/man/sorting.md | 14 +- docs/src/man/split_apply_combine.md | 30 +- 5 files changed, 432 insertions(+), 424 deletions(-) diff --git a/docs/src/man/basics.md b/docs/src/man/basics.md index 59fd54c419..f88ec1fe6c 100644 --- a/docs/src/man/basics.md +++ b/docs/src/man/basics.md @@ -226,25 +226,25 @@ julia> german_ref = CSV.read(joinpath(dirname(pathof(DataFrames)), "..", "docs", "src", "assets", "german.csv"), DataFrame) 1000×10 DataFrame - Row │ id Age Sex Job Housing Saving accounts Checking a ⋯ - │ Int64 Int64 InlineSt… Int64 InlineSt… InlineString15… InlineStri ⋯ + Row │ id Age Sex Job Housing Saving accounts Checking accou ⋯ + │ Int64 Int64 String7 Int64 String7 String15 String15 ⋯ ──────┼───────────────────────────────────────────────────────────────────────── - 1 │ 0 67 male 2 own NA little ⋯ - 2 │ 1 22 female 2 own little moderate - 3 │ 2 49 male 1 own little NA - 4 │ 3 45 male 2 free little little - 5 │ 4 53 male 2 free little little ⋯ - 6 │ 5 35 male 1 free NA NA - 7 │ 6 53 male 2 own quite rich NA - 8 │ 7 35 male 3 rent little moderate - ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ - 994 │ 993 30 male 3 own little little ⋯ - 995 │ 994 50 male 2 own NA NA - 996 │ 995 31 female 1 own little NA - 997 │ 996 40 male 3 own little little - 998 │ 997 38 male 2 own little NA ⋯ - 999 │ 998 23 male 2 free little little - 1000 │ 999 27 male 2 own moderate moderate + 1 │ 0 67 male 2 own NA little ⋯ + 2 │ 1 22 female 2 own little moderate + 3 │ 2 49 male 1 own little NA + 4 │ 3 45 male 2 free little little + 5 │ 4 53 male 2 free little little ⋯ + 6 │ 5 35 male 1 free NA NA + 7 │ 6 53 male 2 own quite rich NA + 8 │ 7 35 male 3 rent little moderate + ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ + 994 │ 993 30 male 3 own little little ⋯ + 995 │ 994 50 male 2 own NA NA + 996 │ 995 31 female 1 own little NA + 997 │ 996 40 male 3 own little little + 998 │ 997 38 male 2 own little NA ⋯ + 999 │ 998 23 male 2 free little little + 1000 │ 999 27 male 2 own moderate moderate 4 columns and 985 rows omitted ``` @@ -254,7 +254,15 @@ Later in the tutorial we will discuss how to force Julia to show the whole data frame if we wanted so. Also observe that DataFrames.jl displays the data type of the column -below its name. In our case, it is an `Int64`, or `String`. +below its name. In our case, it is an `Int64`, or `String7` and `String15`. + +Let us mention here the difference between the standard `String` type in Julia +and e.g. the `String7` or `String15` types. The types with number suffix denote +strings that have a fixed width (similar `CHAR(N)` type provided by many data +bases). Such strings are much faster to work with (especially if you have many +of them) than the standard `String` type because their instances are not heap +allocated. For this reason `CSV.read` by default reads in narrow string columns +using these fixed-width types. Let us now explain in detail the following code block: ```julia @@ -295,7 +303,7 @@ as in the case of the syntax using a `.`. ```jldoctest dataframe julia> german.Sex -1000-element PooledArrays.PooledVector{WeakRefStrings.InlineString7, UInt32, Vector{UInt32}}: +1000-element PooledArrays.PooledVector{String7, UInt32, Vector{UInt32}}: "male" "female" "male" @@ -321,7 +329,7 @@ julia> colname = "Sex" "Sex" julia> german[!, colname] -1000-element PooledArrays.PooledVector{WeakRefStrings.InlineString7, UInt32, Vector{UInt32}}: +1000-element PooledArrays.PooledVector{String7, UInt32, Vector{UInt32}}: "male" "female" "male" @@ -424,14 +432,14 @@ julia> eltype.(eachcol(german)) 10-element Vector{DataType}: Int64 Int64 - WeakRefStrings.InlineString7 + String7 Int64 - WeakRefStrings.InlineString7 - WeakRefStrings.InlineString15 - WeakRefStrings.InlineString15 + String7 + String15 + String15 Int64 Int64 - WeakRefStrings.InlineString31 + String31 ``` !!! note @@ -455,25 +463,25 @@ julia> empty(german) julia> german 1000×10 DataFrame - Row │ id Age Sex Job Housing Saving accounts Checking a ⋯ - │ Int64 Int64 InlineSt… Int64 InlineSt… InlineString15… InlineStri ⋯ + Row │ id Age Sex Job Housing Saving accounts Checking accou ⋯ + │ Int64 Int64 String7 Int64 String7 String15 String15 ⋯ ──────┼───────────────────────────────────────────────────────────────────────── - 1 │ 0 67 male 2 own NA little ⋯ - 2 │ 1 22 female 2 own little moderate - 3 │ 2 49 male 1 own little NA - 4 │ 3 45 male 2 free little little - 5 │ 4 53 male 2 free little little ⋯ - 6 │ 5 35 male 1 free NA NA - 7 │ 6 53 male 2 own quite rich NA - 8 │ 7 35 male 3 rent little moderate - ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ - 994 │ 993 30 male 3 own little little ⋯ - 995 │ 994 50 male 2 own NA NA - 996 │ 995 31 female 1 own little NA - 997 │ 996 40 male 3 own little little - 998 │ 997 38 male 2 own little NA ⋯ - 999 │ 998 23 male 2 free little little - 1000 │ 999 27 male 2 own moderate moderate + 1 │ 0 67 male 2 own NA little ⋯ + 2 │ 1 22 female 2 own little moderate + 3 │ 2 49 male 1 own little NA + 4 │ 3 45 male 2 free little little + 5 │ 4 53 male 2 free little little ⋯ + 6 │ 5 35 male 1 free NA NA + 7 │ 6 53 male 2 own quite rich NA + 8 │ 7 35 male 3 rent little moderate + ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ + 994 │ 993 30 male 3 own little little ⋯ + 995 │ 994 50 male 2 own NA NA + 996 │ 995 31 female 1 own little NA + 997 │ 996 40 male 3 own little little + 998 │ 997 38 male 2 own little NA ⋯ + 999 │ 998 23 male 2 free little little + 1000 │ 999 27 male 2 own moderate moderate 4 columns and 985 rows omitted julia> empty!(german) @@ -552,10 +560,10 @@ julia> describe(german, cols=1:3) 3×7 DataFrame Row │ variable mean min median max nmissing eltype │ Symbol Union… Any Union… Any Int64 DataType -─────┼───────────────────────────────────────────────────────────────── +─────┼──────────────────────────────────────────────────────────── 1 │ id 499.5 0 499.5 999 0 Int64 2 │ Age 35.546 19 33.0 75 0 Int64 - 3 │ Sex female male 0 InlineString7 + 3 │ Sex female male 0 String7 ``` The default statistics reported are mean, min, median, max, number of missing values, and element type of @@ -568,26 +576,26 @@ You can adjust how data frame is displayed by calling the `show` function manual ```jldoctest dataframe julia> show(german, allcols=true) 1000×10 DataFrame - Row │ id Age Sex Job Housing Saving accounts Checking account Credit amount Duration Purpose - │ Int64 Int64 InlineSt… Int64 InlineSt… InlineString15… InlineString15… Int64 Int64 InlineSt… -──────┼──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── - 1 │ 0 67 male 2 own NA little 1169 6 radio/TV - 2 │ 1 22 female 2 own little moderate 5951 48 radio/TV - 3 │ 2 49 male 1 own little NA 2096 12 education - 4 │ 3 45 male 2 free little little 7882 42 furniture/equipment - 5 │ 4 53 male 2 free little little 4870 24 car - 6 │ 5 35 male 1 free NA NA 9055 36 education - 7 │ 6 53 male 2 own quite rich NA 2835 24 furniture/equipment - 8 │ 7 35 male 3 rent little moderate 6948 36 car - ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ - 994 │ 993 30 male 3 own little little 3959 36 furniture/equipment - 995 │ 994 50 male 2 own NA NA 2390 12 car - 996 │ 995 31 female 1 own little NA 1736 12 furniture/equipment - 997 │ 996 40 male 3 own little little 3857 30 car - 998 │ 997 38 male 2 own little NA 804 12 radio/TV - 999 │ 998 23 male 2 free little little 1845 45 radio/TV - 1000 │ 999 27 male 2 own moderate moderate 4576 45 car - 985 rows omitted + Row │ id Age Sex Job Housing Saving accounts Checking account Credit amount Duration Purpose + │ Int64 Int64 String7 Int64 String7 String15 String15 Int64 Int64 String31 +──────┼──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── + 1 │ 0 67 male 2 own NA little 1169 6 radio/TV + 2 │ 1 22 female 2 own little moderate 5951 48 radio/TV + 3 │ 2 49 male 1 own little NA 2096 12 education + 4 │ 3 45 male 2 free little little 7882 42 furniture/equipment + 5 │ 4 53 male 2 free little little 4870 24 car + 6 │ 5 35 male 1 free NA NA 9055 36 education + 7 │ 6 53 male 2 own quite rich NA 2835 24 furniture/equipment + 8 │ 7 35 male 3 rent little moderate 6948 36 car + ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ + 994 │ 993 30 male 3 own little little 3959 36 furniture/equipment + 995 │ 994 50 male 2 own NA NA 2390 12 car + 996 │ 995 31 female 1 own little NA 1736 12 furniture/equipment + 997 │ 996 40 male 3 own little little 3857 30 car + 998 │ 997 38 male 2 own little NA 804 12 radio/TV + 999 │ 998 23 male 2 free little little 1845 45 radio/TV + 1000 │ 999 27 male 2 own moderate moderate 4576 45 car + 985 rows omitted ``` It is easy to compute descriptive statistics directly on individual columns using @@ -637,27 +645,27 @@ using the `first` and `last` functions respectively: ```jldoctest dataframe julia> first(german, 6) 6×10 DataFrame - Row │ id Age Sex Job Housing Saving accounts Checking ac ⋯ - │ Int64 Int64 InlineSt… Int64 InlineSt… InlineString15… InlineStrin ⋯ + Row │ id Age Sex Job Housing Saving accounts Checking accoun ⋯ + │ Int64 Int64 String7 Int64 String7 String15 String15 ⋯ ─────┼────────────────────────────────────────────────────────────────────────── - 1 │ 0 67 male 2 own NA little ⋯ - 2 │ 1 22 female 2 own little moderate - 3 │ 2 49 male 1 own little NA - 4 │ 3 45 male 2 free little little - 5 │ 4 53 male 2 free little little ⋯ - 6 │ 5 35 male 1 free NA NA + 1 │ 0 67 male 2 own NA little ⋯ + 2 │ 1 22 female 2 own little moderate + 3 │ 2 49 male 1 own little NA + 4 │ 3 45 male 2 free little little + 5 │ 4 53 male 2 free little little ⋯ + 6 │ 5 35 male 1 free NA NA 4 columns omitted julia> last(german, 5) 5×10 DataFrame - Row │ id Age Sex Job Housing Saving accounts Checking ac ⋯ - │ Int64 Int64 InlineSt… Int64 InlineSt… InlineString15… InlineStrin ⋯ + Row │ id Age Sex Job Housing Saving accounts Checking accoun ⋯ + │ Int64 Int64 String7 Int64 String7 String15 String15 ⋯ ─────┼────────────────────────────────────────────────────────────────────────── - 1 │ 995 31 female 1 own little NA ⋯ - 2 │ 996 40 male 3 own little little - 3 │ 997 38 male 2 own little NA - 4 │ 998 23 male 2 free little little - 5 │ 999 27 male 2 own moderate moderate ⋯ + 1 │ 995 31 female 1 own little NA ⋯ + 2 │ 996 40 male 3 own little little + 3 │ 997 38 male 2 own little NA + 4 │ 998 23 male 2 free little little + 5 │ 999 27 male 2 own moderate moderate ⋯ 4 columns omitted ``` @@ -671,18 +679,18 @@ i.e. allows to update the source data frame, which is often useful. ```jldoctest dataframe julia> first(german) DataFrameRow - Row │ id Age Sex Job Housing Saving accounts Checking ac ⋯ - │ Int64 Int64 InlineSt… Int64 InlineSt… InlineString15… InlineStrin ⋯ + Row │ id Age Sex Job Housing Saving accounts Checking accoun ⋯ + │ Int64 Int64 String7 Int64 String7 String15 String15 ⋯ ─────┼────────────────────────────────────────────────────────────────────────── - 1 │ 0 67 male 2 own NA little ⋯ + 1 │ 0 67 male 2 own NA little ⋯ 4 columns omitted julia> last(german) DataFrameRow - Row │ id Age Sex Job Housing Saving accounts Checking a ⋯ - │ Int64 Int64 InlineSt… Int64 InlineSt… InlineString15… InlineStri ⋯ + Row │ id Age Sex Job Housing Saving accounts Checking accou ⋯ + │ Int64 Int64 String7 Int64 String7 String15 String15 ⋯ ──────┼───────────────────────────────────────────────────────────────────────── - 1000 │ 999 27 male 2 own moderate moderate ⋯ + 1000 │ 999 27 male 2 own moderate moderate ⋯ 4 columns omitted ``` @@ -702,42 +710,42 @@ Here are a few examples: ```jldoctest dataframe julia> german[1:5, [:Sex, :Age]] 5×2 DataFrame - Row │ Sex Age - │ InlineSt… Int64 -─────┼────────────────── - 1 │ male 67 - 2 │ female 22 - 3 │ male 49 - 4 │ male 45 - 5 │ male 53 + Row │ Sex Age + │ String7 Int64 +─────┼──────────────── + 1 │ male 67 + 2 │ female 22 + 3 │ male 49 + 4 │ male 45 + 5 │ male 53 julia> german[1:5, :] 5×10 DataFrame - Row │ id Age Sex Job Housing Saving accounts Checking ac ⋯ - │ Int64 Int64 InlineSt… Int64 InlineSt… InlineString15… InlineStrin ⋯ + Row │ id Age Sex Job Housing Saving accounts Checking accoun ⋯ + │ Int64 Int64 String7 Int64 String7 String15 String15 ⋯ ─────┼────────────────────────────────────────────────────────────────────────── - 1 │ 0 67 male 2 own NA little ⋯ - 2 │ 1 22 female 2 own little moderate - 3 │ 2 49 male 1 own little NA - 4 │ 3 45 male 2 free little little - 5 │ 4 53 male 2 free little little ⋯ + 1 │ 0 67 male 2 own NA little ⋯ + 2 │ 1 22 female 2 own little moderate + 3 │ 2 49 male 1 own little NA + 4 │ 3 45 male 2 free little little + 5 │ 4 53 male 2 free little little ⋯ 4 columns omitted julia> german[[1, 6, 15], :] 3×10 DataFrame - Row │ id Age Sex Job Housing Saving accounts Checking ac ⋯ - │ Int64 Int64 InlineSt… Int64 InlineSt… InlineString15… InlineStrin ⋯ + Row │ id Age Sex Job Housing Saving accounts Checking accoun ⋯ + │ Int64 Int64 String7 Int64 String7 String15 String15 ⋯ ─────┼────────────────────────────────────────────────────────────────────────── - 1 │ 0 67 male 2 own NA little ⋯ - 2 │ 5 35 male 1 free NA NA - 3 │ 14 28 female 2 rent little little + 1 │ 0 67 male 2 own NA little ⋯ + 2 │ 5 35 male 1 free NA NA + 3 │ 14 28 female 2 rent little little 4 columns omitted julia> german[:, [:Age, :Sex]] 1000×2 DataFrame Row │ Age Sex - │ Int64 InlineSt… -──────┼────────────────── + │ Int64 String7 +──────┼──────────────── 1 │ 67 male 2 │ 22 female 3 │ 49 male @@ -746,7 +754,7 @@ julia> german[:, [:Age, :Sex]] 6 │ 35 male 7 │ 53 male 8 │ 35 male - ⋮ │ ⋮ ⋮ + ⋮ │ ⋮ ⋮ 994 │ 30 male 995 │ 50 male 996 │ 31 female @@ -754,7 +762,7 @@ julia> german[:, [:Age, :Sex]] 998 │ 38 male 999 │ 23 male 1000 │ 27 male - 985 rows omitted + 985 rows omitted ``` Pay attention that `german[!, [:Sex]]` and `german[:, [:Sex]]` returns a data frame object, @@ -769,8 +777,8 @@ Below we show both operations to highlight this difference: julia> german[!, [:Sex]] 1000×1 DataFrame Row │ Sex - │ InlineSt… -──────┼─────────── + │ String7 +──────┼───────── 1 │ male 2 │ female 3 │ male @@ -779,7 +787,7 @@ julia> german[!, [:Sex]] 6 │ male 7 │ male 8 │ male - ⋮ │ ⋮ + ⋮ │ ⋮ 994 │ male 995 │ male 996 │ female @@ -787,10 +795,10 @@ julia> german[!, [:Sex]] 998 │ male 999 │ male 1000 │ male - 985 rows omitted +985 rows omitted julia> german[!, :Sex] -1000-element PooledArrays.PooledVector{WeakRefStrings.InlineString7, UInt32, Vector{UInt32}}: +1000-element PooledArrays.PooledVector{String7, UInt32, Vector{UInt32}}: "male" "female" "male" @@ -848,26 +856,26 @@ efficient than creating a materialized selection. You can create it using a `vie ```jldoctest dataframe julia> view(german, :, 2:5) 1000×4 SubDataFrame - Row │ Age Sex Job Housing - │ Int64 InlineSt… Int64 InlineSt… -──────┼──────────────────────────────────── - 1 │ 67 male 2 own - 2 │ 22 female 2 own - 3 │ 49 male 1 own - 4 │ 45 male 2 free - 5 │ 53 male 2 free - 6 │ 35 male 1 free - 7 │ 53 male 2 own - 8 │ 35 male 3 rent - ⋮ │ ⋮ ⋮ ⋮ ⋮ - 994 │ 30 male 3 own - 995 │ 50 male 2 own - 996 │ 31 female 1 own - 997 │ 40 male 3 own - 998 │ 38 male 2 own - 999 │ 23 male 2 free - 1000 │ 27 male 2 own - 985 rows omitted + Row │ Age Sex Job Housing + │ Int64 String7 Int64 String7 +──────┼──────────────────────────────── + 1 │ 67 male 2 own + 2 │ 22 female 2 own + 3 │ 49 male 1 own + 4 │ 45 male 2 free + 5 │ 53 male 2 free + 6 │ 35 male 1 free + 7 │ 53 male 2 own + 8 │ 35 male 3 rent + ⋮ │ ⋮ ⋮ ⋮ ⋮ + 994 │ 30 male 3 own + 995 │ 50 male 2 own + 996 │ 31 female 1 own + 997 │ 40 male 3 own + 998 │ 38 male 2 own + 999 │ 23 male 2 free + 1000 │ 27 male 2 own + 985 rows omitted ``` or using a `@view` macro: @@ -922,10 +930,10 @@ or a single row: ```jldoctest dataframe julia> @view german[3, 2:5] DataFrameRow - Row │ Age Sex Job Housing - │ Int64 InlineSt… Int64 InlineSt… -─────┼──────────────────────────────────── - 3 │ 49 male 1 own + Row │ Age Sex Job Housing + │ Int64 String7 Int64 String7 +─────┼──────────────────────────────── + 3 │ 49 male 1 own ``` As you can see the row and column indexing syntax is exactly the same as for indexing. @@ -960,15 +968,15 @@ In order to show how to perform mutating operations on a data frame we make a su ```jldoctest dataframe julia> df1 = german[1:6, 2:4] 6×3 DataFrame - Row │ Age Sex Job - │ Int64 InlineSt… Int64 -─────┼───────────────────────── - 1 │ 67 male 2 - 2 │ 22 female 2 - 3 │ 49 male 1 - 4 │ 45 male 2 - 5 │ 53 male 2 - 6 │ 35 male 1 + Row │ Age Sex Job + │ Int64 String7 Int64 +─────┼─────────────────────── + 1 │ 67 male 2 + 2 │ 22 female 2 + 3 │ 49 male 1 + 4 │ 45 male 2 + 5 │ 53 male 2 + 6 │ 35 male 1 ``` In the following example we replace the column `:Age` in our `df1` data frame @@ -995,15 +1003,15 @@ julia> df1.Age = val julia> df1 6×3 DataFrame - Row │ Age Sex Job - │ Int64 InlineSt… Int64 -─────┼───────────────────────── - 1 │ 80 male 2 - 2 │ 85 female 2 - 3 │ 98 male 1 - 4 │ 95 male 2 - 5 │ 78 male 2 - 6 │ 89 male 1 + Row │ Age Sex Job + │ Int64 String7 Int64 +─────┼─────────────────────── + 1 │ 80 male 2 + 2 │ 85 female 2 + 3 │ 98 male 1 + 4 │ 95 male 2 + 5 │ 78 male 2 + 6 │ 89 male 1 ``` This is a non-copying operation. One can perform it only if `val` vector has the same length as number @@ -1027,15 +1035,15 @@ julia> df1[1:3, :Job] = [2, 3, 2] julia> df1 6×3 DataFrame - Row │ Age Sex Job - │ Int64 InlineSt… Int64 -─────┼───────────────────────── - 1 │ 80 male 2 - 2 │ 85 female 3 - 3 │ 98 male 2 - 4 │ 95 male 2 - 5 │ 78 male 2 - 6 │ 89 male 1 + Row │ Age Sex Job + │ Int64 String7 Int64 +─────┼─────────────────────── + 1 │ 80 male 2 + 2 │ 85 female 3 + 3 │ 98 male 2 + 4 │ 95 male 2 + 5 │ 78 male 2 + 6 │ 89 male 1 ``` As a special rule using `!` as row selector replaces column without copying @@ -1395,25 +1403,25 @@ Drop `:Age` column: ```jldoctest dataframe julia> german[:, Not(:Age)] 1000×9 DataFrame - Row │ id Sex Job Housing Saving accounts Checking account ⋯ - │ Int64 InlineSt… Int64 InlineSt… InlineString15… InlineString15… ⋯ + Row │ id Sex Job Housing Saving accounts Checking account Cre ⋯ + │ Int64 String7 Int64 String7 String15 String15 Int ⋯ ──────┼───────────────────────────────────────────────────────────────────────── - 1 │ 0 male 2 own NA little ⋯ - 2 │ 1 female 2 own little moderate - 3 │ 2 male 1 own little NA - 4 │ 3 male 2 free little little - 5 │ 4 male 2 free little little ⋯ - 6 │ 5 male 1 free NA NA - 7 │ 6 male 2 own quite rich NA - 8 │ 7 male 3 rent little moderate - ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ - 994 │ 993 male 3 own little little ⋯ - 995 │ 994 male 2 own NA NA - 996 │ 995 female 1 own little NA - 997 │ 996 male 3 own little little - 998 │ 997 male 2 own little NA ⋯ - 999 │ 998 male 2 free little little - 1000 │ 999 male 2 own moderate moderate + 1 │ 0 male 2 own NA little ⋯ + 2 │ 1 female 2 own little moderate + 3 │ 2 male 1 own little NA + 4 │ 3 male 2 free little little + 5 │ 4 male 2 free little little ⋯ + 6 │ 5 male 1 free NA NA + 7 │ 6 male 2 own quite rich NA + 8 │ 7 male 3 rent little moderate + ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ + 994 │ 993 male 3 own little little ⋯ + 995 │ 994 male 2 own NA NA + 996 │ 995 female 1 own little NA + 997 │ 996 male 3 own little little + 998 │ 997 male 2 own little NA ⋯ + 999 │ 998 male 2 free little little + 1000 │ 999 male 2 own moderate moderate 3 columns and 985 rows omitted ``` @@ -1450,26 +1458,26 @@ selectors passed as its arguments: ```jldoctest dataframe julia> german[:, Cols("Age", Between("Sex", "Job"))] 1000×3 DataFrame - Row │ Age Sex Job - │ Int64 InlineSt… Int64 -──────┼───────────────────────── - 1 │ 67 male 2 - 2 │ 22 female 2 - 3 │ 49 male 1 - 4 │ 45 male 2 - 5 │ 53 male 2 - 6 │ 35 male 1 - 7 │ 53 male 2 - 8 │ 35 male 3 - ⋮ │ ⋮ ⋮ ⋮ - 994 │ 30 male 3 - 995 │ 50 male 2 - 996 │ 31 female 1 - 997 │ 40 male 3 - 998 │ 38 male 2 - 999 │ 23 male 2 - 1000 │ 27 male 2 - 985 rows omitted + Row │ Age Sex Job + │ Int64 String7 Int64 +──────┼─────────────────────── + 1 │ 67 male 2 + 2 │ 22 female 2 + 3 │ 49 male 1 + 4 │ 45 male 2 + 5 │ 53 male 2 + 6 │ 35 male 1 + 7 │ 53 male 2 + 8 │ 35 male 3 + ⋮ │ ⋮ ⋮ ⋮ + 994 │ 30 male 3 + 995 │ 50 male 2 + 996 │ 31 female 1 + 997 │ 40 male 3 + 998 │ 38 male 2 + 999 │ 23 male 2 + 1000 │ 27 male 2 + 985 rows omitted ``` You can also use `Regex` (regular expressions) to select columns. In the example @@ -1478,26 +1486,26 @@ below we select columns that have `"S"` in their name and also we use `Not` to d ```jldoctest dataframe julia> german[Not(5), r"S"] 999×2 DataFrame - Row │ Sex Saving accounts - │ InlineSt… InlineString15… -─────┼──────────────────────────── - 1 │ male NA - 2 │ female little - 3 │ male little - 4 │ male little - 5 │ male NA - 6 │ male quite rich - 7 │ male little - 8 │ male rich - ⋮ │ ⋮ ⋮ - 993 │ male little - 994 │ male NA - 995 │ female little - 996 │ male little - 997 │ male little - 998 │ male little - 999 │ male moderate - 984 rows omitted + Row │ Sex Saving accounts + │ String7 String15 +─────┼────────────────────────── + 1 │ male NA + 2 │ female little + 3 │ male little + 4 │ male little + 5 │ male NA + 6 │ male quite rich + 7 │ male little + 8 │ male rich + ⋮ │ ⋮ ⋮ + 993 │ male little + 994 │ male NA + 995 │ female little + 996 │ male little + 997 │ male little + 998 │ male little + 999 │ male moderate + 984 rows omitted ``` ## Basic Usage of Transformation Functions @@ -1581,8 +1589,8 @@ exactly like in `select`. Here is an example: julia> combine(german, :Age => mean => :mean_age, :Housing => unique => :housing) 3×2 DataFrame Row │ mean_age housing - │ Float64 InlineSt… -─────┼───────────────────── + │ Float64 String7 +─────┼─────────────────── 1 │ 35.546 own 2 │ 35.546 free 3 │ 35.546 rent @@ -1689,25 +1697,25 @@ the resulting data frame: ```jldoctest dataframe julia> select(german, Not(:Age)) 1000×9 DataFrame - Row │ id Sex Job Housing Saving accounts Checking account ⋯ - │ Int64 InlineSt… Int64 InlineSt… InlineString15… InlineString15… ⋯ + Row │ id Sex Job Housing Saving accounts Checking account Cre ⋯ + │ Int64 String7 Int64 String7 String15 String15 Int ⋯ ──────┼───────────────────────────────────────────────────────────────────────── - 1 │ 0 male 2 own NA little ⋯ - 2 │ 1 female 2 own little moderate - 3 │ 2 male 1 own little NA - 4 │ 3 male 2 free little little - 5 │ 4 male 2 free little little ⋯ - 6 │ 5 male 1 free NA NA - 7 │ 6 male 2 own quite rich NA - 8 │ 7 male 3 rent little moderate - ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ - 994 │ 993 male 3 own little little ⋯ - 995 │ 994 male 2 own NA NA - 996 │ 995 female 1 own little NA - 997 │ 996 male 3 own little little - 998 │ 997 male 2 own little NA ⋯ - 999 │ 998 male 2 free little little - 1000 │ 999 male 2 own moderate moderate + 1 │ 0 male 2 own NA little ⋯ + 2 │ 1 female 2 own little moderate + 3 │ 2 male 1 own little NA + 4 │ 3 male 2 free little little + 5 │ 4 male 2 free little little ⋯ + 6 │ 5 male 1 free NA NA + 7 │ 6 male 2 own quite rich NA + 8 │ 7 male 3 rent little moderate + ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ + 994 │ 993 male 3 own little little ⋯ + 995 │ 994 male 2 own NA NA + 996 │ 995 female 1 own little NA + 997 │ 996 male 3 own little little + 998 │ 997 male 2 own little NA ⋯ + 999 │ 998 male 2 free little little + 1000 │ 999 male 2 own moderate moderate 3 columns and 985 rows omitted ``` @@ -1718,26 +1726,26 @@ as we have described above: ```jldoctest dataframe julia> select(german, r"S") 1000×2 DataFrame - Row │ Sex Saving accounts - │ InlineSt… InlineString15… -──────┼──────────────────────────── - 1 │ male NA - 2 │ female little - 3 │ male little - 4 │ male little - 5 │ male little - 6 │ male NA - 7 │ male quite rich - 8 │ male little - ⋮ │ ⋮ ⋮ - 994 │ male little - 995 │ male NA - 996 │ female little - 997 │ male little - 998 │ male little - 999 │ male little - 1000 │ male moderate - 985 rows omitted + Row │ Sex Saving accounts + │ String7 String15 +──────┼────────────────────────── + 1 │ male NA + 2 │ female little + 3 │ male little + 4 │ male little + 5 │ male little + 6 │ male NA + 7 │ male quite rich + 8 │ male little + ⋮ │ ⋮ ⋮ + 994 │ male little + 995 │ male NA + 996 │ female little + 997 │ male little + 998 │ male little + 999 │ male little + 1000 │ male moderate + 985 rows omitted ``` The benefit of `select` or `combine` over indexing is that it is easier @@ -1746,26 +1754,26 @@ to combine several column selectors, e.g.: ```jldoctest dataframe julia> select(german, r"S", "Job", 1) 1000×4 DataFrame - Row │ Sex Saving accounts Job id - │ InlineSt… InlineString15… Int64 Int64 -──────┼────────────────────────────────────────── - 1 │ male NA 2 0 - 2 │ female little 2 1 - 3 │ male little 1 2 - 4 │ male little 2 3 - 5 │ male little 2 4 - 6 │ male NA 1 5 - 7 │ male quite rich 2 6 - 8 │ male little 3 7 - ⋮ │ ⋮ ⋮ ⋮ ⋮ - 994 │ male little 3 993 - 995 │ male NA 2 994 - 996 │ female little 1 995 - 997 │ male little 3 996 - 998 │ male little 2 997 - 999 │ male little 2 998 - 1000 │ male moderate 2 999 - 985 rows omitted + Row │ Sex Saving accounts Job id + │ String7 String15 Int64 Int64 +──────┼──────────────────────────────────────── + 1 │ male NA 2 0 + 2 │ female little 2 1 + 3 │ male little 1 2 + 4 │ male little 2 3 + 5 │ male little 2 4 + 6 │ male NA 1 5 + 7 │ male quite rich 2 6 + 8 │ male little 3 7 + ⋮ │ ⋮ ⋮ ⋮ ⋮ + 994 │ male little 3 993 + 995 │ male NA 2 994 + 996 │ female little 1 995 + 997 │ male little 3 996 + 998 │ male little 2 997 + 999 │ male little 2 998 + 1000 │ male moderate 2 999 + 985 rows omitted ``` Taking advantage of this flexibility here is an idiomatic pattern to move some column to the front of a data frame: @@ -1773,25 +1781,25 @@ Taking advantage of this flexibility here is an idiomatic pattern to move some c ```jldoctest dataframe julia> select(german, "Sex", :) 1000×10 DataFrame - Row │ Sex id Age Job Housing Saving accounts Checking a ⋯ - │ InlineSt… Int64 Int64 Int64 InlineSt… InlineString15… InlineStri ⋯ + Row │ Sex id Age Job Housing Saving accounts Checking accou ⋯ + │ String7 Int64 Int64 Int64 String7 String15 String15 ⋯ ──────┼───────────────────────────────────────────────────────────────────────── - 1 │ male 0 67 2 own NA little ⋯ - 2 │ female 1 22 2 own little moderate - 3 │ male 2 49 1 own little NA - 4 │ male 3 45 2 free little little - 5 │ male 4 53 2 free little little ⋯ - 6 │ male 5 35 1 free NA NA - 7 │ male 6 53 2 own quite rich NA - 8 │ male 7 35 3 rent little moderate - ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ - 994 │ male 993 30 3 own little little ⋯ - 995 │ male 994 50 2 own NA NA - 996 │ female 995 31 1 own little NA - 997 │ male 996 40 3 own little little - 998 │ male 997 38 2 own little NA ⋯ - 999 │ male 998 23 2 free little little - 1000 │ male 999 27 2 own moderate moderate + 1 │ male 0 67 2 own NA little ⋯ + 2 │ female 1 22 2 own little moderate + 3 │ male 2 49 1 own little NA + 4 │ male 3 45 2 free little little + 5 │ male 4 53 2 free little little ⋯ + 6 │ male 5 35 1 free NA NA + 7 │ male 6 53 2 own quite rich NA + 8 │ male 7 35 3 rent little moderate + ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ + 994 │ male 993 30 3 own little little ⋯ + 995 │ male 994 50 2 own NA NA + 996 │ female 995 31 1 own little NA + 997 │ male 996 40 3 own little little + 998 │ male 997 38 2 own little NA ⋯ + 999 │ male 998 23 2 free little little + 1000 │ male 999 27 2 own moderate moderate 4 columns and 985 rows omitted ``` @@ -1801,26 +1809,26 @@ Below, we are simply passing source column and target column name to rename them ```jldoctest dataframe julia> select(german, :Sex => :x1, :Age => :x2) 1000×2 DataFrame - Row │ x1 x2 - │ InlineSt… Int64 -──────┼────────────────── - 1 │ male 67 - 2 │ female 22 - 3 │ male 49 - 4 │ male 45 - 5 │ male 53 - 6 │ male 35 - 7 │ male 53 - 8 │ male 35 - ⋮ │ ⋮ ⋮ - 994 │ male 30 - 995 │ male 50 - 996 │ female 31 - 997 │ male 40 - 998 │ male 38 - 999 │ male 23 - 1000 │ male 27 - 985 rows omitted + Row │ x1 x2 + │ String7 Int64 +──────┼──────────────── + 1 │ male 67 + 2 │ female 22 + 3 │ male 49 + 4 │ male 45 + 5 │ male 53 + 6 │ male 35 + 7 │ male 53 + 8 │ male 35 + ⋮ │ ⋮ ⋮ + 994 │ male 30 + 995 │ male 50 + 996 │ female 31 + 997 │ male 40 + 998 │ male 38 + 999 │ male 23 + 1000 │ male 27 + 985 rows omitted ``` It is important to note that `select` always returns a data frame, even if a single column selected @@ -1881,8 +1889,8 @@ the `copycols=false` keyword argument: julia> df = select(german, :Sex) 1000×1 DataFrame Row │ Sex - │ InlineSt… -──────┼─────────── + │ String7 +──────┼───────── 1 │ male 2 │ female 3 │ male @@ -1891,7 +1899,7 @@ julia> df = select(german, :Sex) 6 │ male 7 │ male 8 │ male - ⋮ │ ⋮ + ⋮ │ ⋮ 994 │ male 995 │ male 996 │ female @@ -1899,7 +1907,7 @@ julia> df = select(german, :Sex) 998 │ male 999 │ male 1000 │ male - 985 rows omitted +985 rows omitted julia> df.Sex === german.Sex # copy false @@ -1907,8 +1915,8 @@ false julia> df = select(german, :Sex, copycols=false) 1000×1 DataFrame Row │ Sex - │ InlineSt… -──────┼─────────── + │ String7 +──────┼───────── 1 │ male 2 │ female 3 │ male @@ -1917,7 +1925,7 @@ julia> df = select(german, :Sex, copycols=false) 6 │ male 7 │ male 8 │ male - ⋮ │ ⋮ + ⋮ │ ⋮ 994 │ male 995 │ male 996 │ female @@ -1925,7 +1933,7 @@ julia> df = select(german, :Sex, copycols=false) 998 │ male 999 │ male 1000 │ male - 985 rows omitted +985 rows omitted julia> df.Sex === german.Sex # no-copy is performed true @@ -1938,25 +1946,25 @@ julia> select!(german, Not(:Age)); julia> german 1000×9 DataFrame - Row │ id Sex Job Housing Saving accounts Checking account ⋯ - │ Int64 InlineSt… Int64 InlineSt… InlineString15… InlineString15… ⋯ + Row │ id Sex Job Housing Saving accounts Checking account Cre ⋯ + │ Int64 String7 Int64 String7 String15 String15 Int ⋯ ──────┼───────────────────────────────────────────────────────────────────────── - 1 │ 0 male 2 own NA little ⋯ - 2 │ 1 female 2 own little moderate - 3 │ 2 male 1 own little NA - 4 │ 3 male 2 free little little - 5 │ 4 male 2 free little little ⋯ - 6 │ 5 male 1 free NA NA - 7 │ 6 male 2 own quite rich NA - 8 │ 7 male 3 rent little moderate - ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ - 994 │ 993 male 3 own little little ⋯ - 995 │ 994 male 2 own NA NA - 996 │ 995 female 1 own little NA - 997 │ 996 male 3 own little little - 998 │ 997 male 2 own little NA ⋯ - 999 │ 998 male 2 free little little - 1000 │ 999 male 2 own moderate moderate + 1 │ 0 male 2 own NA little ⋯ + 2 │ 1 female 2 own little moderate + 3 │ 2 male 1 own little NA + 4 │ 3 male 2 free little little + 5 │ 4 male 2 free little little ⋯ + 6 │ 5 male 1 free NA NA + 7 │ 6 male 2 own quite rich NA + 8 │ 7 male 3 rent little moderate + ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ + 994 │ 993 male 3 own little little ⋯ + 995 │ 994 male 2 own NA NA + 996 │ 995 female 1 own little NA + 997 │ 996 male 3 own little little + 998 │ 997 male 2 own little NA ⋯ + 999 │ 998 male 2 free little little + 1000 │ 999 male 2 own moderate moderate 3 columns and 985 rows omitted ``` @@ -1970,31 +1978,31 @@ julia> german = copy(german_ref); julia> df = german_ref[1:8, 1:5] 8×5 DataFrame - Row │ id Age Sex Job Housing - │ Int64 Int64 InlineSt… Int64 InlineSt… -─────┼─────────────────────────────────────────── - 1 │ 0 67 male 2 own - 2 │ 1 22 female 2 own - 3 │ 2 49 male 1 own - 4 │ 3 45 male 2 free - 5 │ 4 53 male 2 free - 6 │ 5 35 male 1 free - 7 │ 6 53 male 2 own - 8 │ 7 35 male 3 rent + Row │ id Age Sex Job Housing + │ Int64 Int64 String7 Int64 String7 +─────┼─────────────────────────────────────── + 1 │ 0 67 male 2 own + 2 │ 1 22 female 2 own + 3 │ 2 49 male 1 own + 4 │ 3 45 male 2 free + 5 │ 4 53 male 2 free + 6 │ 5 35 male 1 free + 7 │ 6 53 male 2 own + 8 │ 7 35 male 3 rent julia> transform(df, :Age => maximum) 8×6 DataFrame - Row │ id Age Sex Job Housing Age_maximum - │ Int64 Int64 InlineSt… Int64 InlineSt… Int64 -─────┼──────────────────────────────────────────────────────── - 1 │ 0 67 male 2 own 67 - 2 │ 1 22 female 2 own 67 - 3 │ 2 49 male 1 own 67 - 4 │ 3 45 male 2 free 67 - 5 │ 4 53 male 2 free 67 - 6 │ 5 35 male 1 free 67 - 7 │ 6 53 male 2 own 67 - 8 │ 7 35 male 3 rent 67 + Row │ id Age Sex Job Housing Age_maximum + │ Int64 Int64 String7 Int64 String7 Int64 +─────┼──────────────────────────────────────────────────── + 1 │ 0 67 male 2 own 67 + 2 │ 1 22 female 2 own 67 + 3 │ 2 49 male 1 own 67 + 4 │ 3 45 male 2 free 67 + 5 │ 4 53 male 2 free 67 + 6 │ 5 35 male 1 free 67 + 7 │ 6 53 male 2 own 67 + 8 │ 7 35 male 3 rent 67 ``` In the example below we are swapping values stored in columns `:Sex` and `:Age`: @@ -2002,25 +2010,25 @@ In the example below we are swapping values stored in columns `:Sex` and `:Age`: ```jldoctest dataframe julia> transform(german, :Age => :Sex, :Sex => :Age) 1000×10 DataFrame - Row │ id Age Sex Job Housing Saving accounts Checking a ⋯ - │ Int64 InlineSt… Int64 Int64 InlineSt… InlineString15… InlineStri ⋯ + Row │ id Age Sex Job Housing Saving accounts Checking accou ⋯ + │ Int64 String7 Int64 Int64 String7 String15 String15 ⋯ ──────┼───────────────────────────────────────────────────────────────────────── - 1 │ 0 male 67 2 own NA little ⋯ - 2 │ 1 female 22 2 own little moderate - 3 │ 2 male 49 1 own little NA - 4 │ 3 male 45 2 free little little - 5 │ 4 male 53 2 free little little ⋯ - 6 │ 5 male 35 1 free NA NA - 7 │ 6 male 53 2 own quite rich NA - 8 │ 7 male 35 3 rent little moderate - ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ - 994 │ 993 male 30 3 own little little ⋯ - 995 │ 994 male 50 2 own NA NA - 996 │ 995 female 31 1 own little NA - 997 │ 996 male 40 3 own little little - 998 │ 997 male 38 2 own little NA ⋯ - 999 │ 998 male 23 2 free little little - 1000 │ 999 male 27 2 own moderate moderate + 1 │ 0 male 67 2 own NA little ⋯ + 2 │ 1 female 22 2 own little moderate + 3 │ 2 male 49 1 own little NA + 4 │ 3 male 45 2 free little little + 5 │ 4 male 53 2 free little little ⋯ + 6 │ 5 male 35 1 free NA NA + 7 │ 6 male 53 2 own quite rich NA + 8 │ 7 male 35 3 rent little moderate + ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ + 994 │ 993 male 30 3 own little little ⋯ + 995 │ 994 male 50 2 own NA NA + 996 │ 995 female 31 1 own little NA + 997 │ 996 male 40 3 own little little + 998 │ 997 male 38 2 own little NA ⋯ + 999 │ 998 male 23 2 free little little + 1000 │ 999 male 27 2 own moderate moderate 4 columns and 985 rows omitted ``` diff --git a/docs/src/man/querying_frameworks.md b/docs/src/man/querying_frameworks.md index a2cd55f4d8..771a378bb1 100644 --- a/docs/src/man/querying_frameworks.md +++ b/docs/src/man/querying_frameworks.md @@ -24,10 +24,10 @@ The major benefit of the package is that it allows you to refer to columns of a expressions. Additionally you can chain a sequence of transformations of a `DataFrame` using the `@linq` macro. -When a DataFramesMeta.jl macro such as `@select`, `@transform`, `@by`, `@combine`, +When a DataFramesMeta.jl macro such as `@select`, `@transform`, `@by`, `@combine`, `@where`, or `@orderby` is called inside a `@linq` block, you can omit the `@`. Therefore `transform` inside `@linq` is not the same as `transform` -outside of a `@linq` block. +outside of a `@linq` block. Here is a minimal example of usage of the package. Observe that we refer to names of columns using only their names and that chaining is performed using the diff --git a/docs/src/man/reshaping_and_pivoting.md b/docs/src/man/reshaping_and_pivoting.md index 2122cfa67e..38068c89e0 100755 --- a/docs/src/man/reshaping_and_pivoting.md +++ b/docs/src/man/reshaping_and_pivoting.md @@ -10,7 +10,7 @@ julia> iris = CSV.read((joinpath(dirname(pathof(DataFrames)), DataFrame) 150×5 DataFrame Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 InlineSt… + │ Float64 Float64 Float64 Float64 String15 ─────┼────────────────────────────────────────────────────────────────── 1 │ 5.1 3.5 1.4 0.2 Iris-setosa 2 │ 4.9 3.0 1.4 0.2 Iris-setosa @@ -33,7 +33,7 @@ julia> iris = CSV.read((joinpath(dirname(pathof(DataFrames)), julia> stack(iris, 1:4) 600×3 DataFrame Row │ Species variable value - │ InlineSt… String Float64 + │ String15 String Float64 ─────┼────────────────────────────────────── 1 │ Iris-setosa SepalLength 5.1 2 │ Iris-setosa SepalLength 4.9 @@ -62,7 +62,7 @@ be given: julia> stack(iris, [:SepalLength, :SepalWidth, :PetalLength, :PetalWidth]) 600×3 DataFrame Row │ Species variable value - │ InlineSt… String Float64 + │ String15 String Float64 ─────┼────────────────────────────────────── 1 │ Iris-setosa SepalLength 5.1 2 │ Iris-setosa SepalLength 4.9 @@ -100,7 +100,7 @@ the long format: julia> stack(iris, [:SepalLength, :SepalWidth], :Species) 300×3 DataFrame Row │ Species variable value - │ InlineSt… String Float64 + │ String15 String Float64 ─────┼────────────────────────────────────── 1 │ Iris-setosa SepalLength 5.1 2 │ Iris-setosa SepalLength 4.9 @@ -127,7 +127,7 @@ If you prefer to specify the id columns then use `Not` with `stack` like this: julia> stack(iris, Not(:Species)) 600×3 DataFrame Row │ Species variable value - │ InlineSt… String Float64 + │ String15 String Float64 ─────┼────────────────────────────────────── 1 │ Iris-setosa SepalLength 5.1 2 │ Iris-setosa SepalLength 4.9 @@ -159,7 +159,7 @@ julia> iris.id = 1:size(iris, 1) julia> longdf = stack(iris, Not([:Species, :id])) 600×4 DataFrame Row │ Species id variable value - │ InlineSt… Int64 String Float64 + │ String15 Int64 String Float64 ─────┼───────────────────────────────────────────── 1 │ Iris-setosa 1 SepalLength 5.1 2 │ Iris-setosa 2 SepalLength 4.9 @@ -209,7 +209,7 @@ If the remaining columns are unique, you can skip the id variable and use: julia> unstack(longdf, :variable, :value) 150×6 DataFrame Row │ Species id SepalLength SepalWidth PetalLength PetalWidth ⋯ - │ InlineSt… Int64 Float64? Float64? Float64? Float64? ⋯ + │ String15 Int64 Float64? Float64? Float64? Float64? ⋯ ─────┼────────────────────────────────────────────────────────────────────────── 1 │ Iris-setosa 1 5.1 3.5 1.4 0.2 ⋯ 2 │ Iris-setosa 2 4.9 3.0 1.4 0.2 @@ -236,7 +236,7 @@ arguments, as they will be used by default, and write: julia> unstack(longdf) 150×6 DataFrame Row │ Species id SepalLength SepalWidth PetalLength PetalWidth ⋯ - │ InlineSt… Int64 Float64? Float64? Float64? Float64? ⋯ + │ String15 Int64 Float64? Float64? Float64? Float64? ⋯ ─────┼────────────────────────────────────────────────────────────────────────── 1 │ Iris-setosa 1 5.1 3.5 1.4 0.2 ⋯ 2 │ Iris-setosa 2 4.9 3.0 1.4 0.2 @@ -264,7 +264,7 @@ the original wide data frame. Here is an example: julia> stack(iris, view=true) 600×4 DataFrame Row │ Species id variable value - │ InlineSt… Int64 String Float64 + │ String15 Int64 String Float64 ─────┼───────────────────────────────────────────── 1 │ Iris-setosa 1 SepalLength 5.1 2 │ Iris-setosa 2 SepalLength 4.9 @@ -306,7 +306,7 @@ julia> using Statistics julia> d = stack(iris, Not(:Species)) 750×3 DataFrame Row │ Species variable value - │ InlineSt… String Float64 + │ String15 String Float64 ─────┼────────────────────────────────────── 1 │ Iris-setosa SepalLength 5.1 2 │ Iris-setosa SepalLength 4.9 @@ -329,7 +329,7 @@ julia> d = stack(iris, Not(:Species)) julia> x = combine(groupby(d, [:variable, :Species]), :value => mean => :vsum) 15×3 DataFrame Row │ variable Species vsum - │ String InlineSt… Float64 + │ String String15 Float64 ─────┼─────────────────────────────────────── 1 │ SepalLength Iris-setosa 5.006 2 │ SepalLength Iris-versicolor 5.936 diff --git a/docs/src/man/sorting.md b/docs/src/man/sorting.md index d90e520bc1..c5601687b5 100644 --- a/docs/src/man/sorting.md +++ b/docs/src/man/sorting.md @@ -10,7 +10,7 @@ julia> iris = CSV.read((joinpath(dirname(pathof(DataFrames)), DataFrame) 150×5 DataFrame Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 InlineSt… + │ Float64 Float64 Float64 Float64 String15 ─────┼────────────────────────────────────────────────────────────────── 1 │ 5.1 3.5 1.4 0.2 Iris-setosa 2 │ 4.9 3.0 1.4 0.2 Iris-setosa @@ -33,7 +33,7 @@ julia> iris = CSV.read((joinpath(dirname(pathof(DataFrames)), julia> sort!(iris) 150×5 DataFrame Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 InlineSt… + │ Float64 Float64 Float64 Float64 String15 ─────┼────────────────────────────────────────────────────────────────── 1 │ 4.3 3.0 1.1 0.1 Iris-setosa 2 │ 4.4 2.9 1.4 0.2 Iris-setosa @@ -65,7 +65,7 @@ Here are some examples showing most of the possible options: julia> sort!(iris, rev = true) 150×5 DataFrame Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 InlineSt… + │ Float64 Float64 Float64 Float64 String15 ─────┼────────────────────────────────────────────────────────────────── 1 │ 7.9 3.8 6.4 2.0 Iris-virginica 2 │ 7.7 3.8 6.7 2.2 Iris-virginica @@ -88,7 +88,7 @@ julia> sort!(iris, rev = true) julia> sort!(iris, [:Species, :SepalWidth]) 150×5 DataFrame Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 InlineSt… + │ Float64 Float64 Float64 Float64 String15 ─────┼────────────────────────────────────────────────────────────────── 1 │ 4.5 2.3 1.3 0.3 Iris-setosa 2 │ 4.4 2.9 1.4 0.2 Iris-setosa @@ -111,7 +111,7 @@ julia> sort!(iris, [:Species, :SepalWidth]) julia> sort!(iris, [order(:Species, by=length), order(:SepalLength, rev=true)]) 150×5 DataFrame Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 InlineSt… + │ Float64 Float64 Float64 Float64 String15 ─────┼─────────────────────────────────────────────────────────────────── 1 │ 5.8 4.0 1.2 0.2 Iris-setosa 2 │ 5.7 3.8 1.7 0.3 Iris-setosa @@ -148,7 +148,7 @@ rows will be sorted by increasing `:PetalLength`: julia> sort!(iris, [:Species, :PetalLength], rev=(true, false)) 150×5 DataFrame Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 InlineSt… + │ Float64 Float64 Float64 Float64 String15 ─────┼────────────────────────────────────────────────────────────────── 1 │ 4.9 2.5 4.5 1.7 Iris-virginica 2 │ 6.2 2.8 4.8 1.8 Iris-virginica @@ -171,7 +171,7 @@ julia> sort!(iris, [:Species, :PetalLength], rev=(true, false)) julia> sort!(iris, [order(:Species, rev=true), :PetalLength]) 150×5 DataFrame Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 InlineSt… + │ Float64 Float64 Float64 Float64 String15 ─────┼────────────────────────────────────────────────────────────────── 1 │ 4.9 2.5 4.5 1.7 Iris-virginica 2 │ 6.2 2.8 4.8 1.8 Iris-virginica diff --git a/docs/src/man/split_apply_combine.md b/docs/src/man/split_apply_combine.md index ab9be0d0ac..66f2954ad1 100644 --- a/docs/src/man/split_apply_combine.md +++ b/docs/src/man/split_apply_combine.md @@ -166,7 +166,7 @@ julia> iris = CSV.read((joinpath(dirname(pathof(DataFrames)), DataFrame) 150×5 DataFrame Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 InlineSt… + │ Float64 Float64 Float64 Float64 String15 ─────┼────────────────────────────────────────────────────────────────── 1 │ 5.1 3.5 1.4 0.2 Iris-setosa 2 │ 4.9 3.0 1.4 0.2 Iris-setosa @@ -190,7 +190,7 @@ julia> gdf = groupby(iris, :Species) GroupedDataFrame with 3 groups based on key: Species First Group (50 rows): Species = "Iris-setosa" Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 InlineSt… + │ Float64 Float64 Float64 Float64 String15 ─────┼─────────────────────────────────────────────────────────────── 1 │ 5.1 3.5 1.4 0.2 Iris-setosa 2 │ 4.9 3.0 1.4 0.2 Iris-setosa @@ -213,7 +213,7 @@ First Group (50 rows): Species = "Iris-setosa" ⋮ Last Group (50 rows): Species = "Iris-virginica" Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 InlineSt… + │ Float64 Float64 Float64 Float64 String15 ─────┼────────────────────────────────────────────────────────────────── 1 │ 6.3 3.3 6.0 2.5 Iris-virginica 2 │ 5.8 2.7 5.1 1.9 Iris-virginica @@ -237,7 +237,7 @@ Last Group (50 rows): Species = "Iris-virginica" julia> combine(gdf, :PetalLength => mean) 3×2 DataFrame Row │ Species PetalLength_mean - │ InlineSt… Float64 + │ String15 Float64 ─────┼─────────────────────────────────── 1 │ Iris-setosa 1.464 2 │ Iris-versicolor 4.26 @@ -246,7 +246,7 @@ julia> combine(gdf, :PetalLength => mean) julia> combine(gdf, nrow) 3×2 DataFrame Row │ Species nrow - │ InlineSt… Int64 + │ String15 Int64 ─────┼──────────────────────── 1 │ Iris-setosa 50 2 │ Iris-versicolor 50 @@ -255,7 +255,7 @@ julia> combine(gdf, nrow) julia> combine(gdf, nrow, :PetalLength => mean => :mean) 3×3 DataFrame Row │ Species nrow mean - │ InlineSt… Int64 Float64 + │ String15 Int64 Float64 ─────┼───────────────────────────────── 1 │ Iris-setosa 50 1.464 2 │ Iris-versicolor 50 4.26 @@ -265,7 +265,7 @@ julia> combine(gdf, [:PetalLength, :SepalLength] => ((p, s) -> (a=mean(p)/mean(s AsTable) # multiple columns are passed as arguments 3×3 DataFrame Row │ Species a b - │ InlineSt… Float64 Float64 + │ String15 Float64 Float64 ─────┼──────────────────────────────────── 1 │ Iris-setosa 0.292449 73.2 2 │ Iris-versicolor 0.717655 213.0 @@ -276,7 +276,7 @@ julia> combine(gdf, x -> std(x.PetalLength) / std(x.SepalLength)) # passing a NamedTuple 3×2 DataFrame Row │ Species PetalLength_SepalLength_function - │ InlineSt… Float64 + │ String15 Float64 ─────┼─────────────────────────────────────────────────── 1 │ Iris-setosa 0.492245 2 │ Iris-versicolor 0.910378 @@ -285,7 +285,7 @@ julia> combine(gdf, julia> combine(x -> std(x.PetalLength) / std(x.SepalLength), gdf) # passing a SubDataFrame 3×2 DataFrame Row │ Species x1 - │ InlineSt… Float64 + │ String15 Float64 ─────┼─────────────────────────── 1 │ Iris-setosa 0.492245 2 │ Iris-versicolor 0.910378 @@ -294,7 +294,7 @@ julia> combine(x -> std(x.PetalLength) / std(x.SepalLength), gdf) # passing a Su julia> combine(gdf, 1:2 => cor, nrow) 3×3 DataFrame Row │ Species SepalLength_SepalWidth_cor nrow - │ InlineSt… Float64 Int64 + │ String15 Float64 Int64 ─────┼──────────────────────────────────────────────────── 1 │ Iris-setosa 0.74678 50 2 │ Iris-versicolor 0.525911 50 @@ -303,7 +303,7 @@ julia> combine(gdf, 1:2 => cor, nrow) julia> combine(gdf, :PetalLength => (x -> [extrema(x)]) => [:min, :max]) 3×3 DataFrame Row │ Species min max - │ InlineSt… Float64 Float64 + │ String15 Float64 Float64 ─────┼─────────────────────────────────── 1 │ Iris-setosa 1.0 1.9 2 │ Iris-versicolor 3.0 5.1 @@ -356,7 +356,7 @@ julia> combine(gdf) do df end 3×3 DataFrame Row │ Species m s² - │ InlineSt… Float64 Float64 + │ String15 Float64 Float64 ─────┼───────────────────────────────────── 1 │ Iris-setosa 1.464 0.0301061 2 │ Iris-versicolor 4.26 0.220816 @@ -478,7 +478,7 @@ julia> gd = groupby(iris, :Species) GroupedDataFrame with 3 groups based on key: Species First Group (50 rows): Species = "Iris-setosa" Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 InlineSt… + │ Float64 Float64 Float64 Float64 String15 ─────┼─────────────────────────────────────────────────────────────── 1 │ 5.1 3.5 1.4 0.2 Iris-setosa 2 │ 4.9 3.0 1.4 0.2 Iris-setosa @@ -501,7 +501,7 @@ First Group (50 rows): Species = "Iris-setosa" ⋮ Last Group (50 rows): Species = "Iris-virginica" Row │ SepalLength SepalWidth PetalLength PetalWidth Species - │ Float64 Float64 Float64 Float64 InlineSt… + │ Float64 Float64 Float64 Float64 String15 ─────┼────────────────────────────────────────────────────────────────── 1 │ 6.3 3.3 6.0 2.5 Iris-virginica 2 │ 5.8 2.7 5.1 1.9 Iris-virginica @@ -525,7 +525,7 @@ Last Group (50 rows): Species = "Iris-virginica" julia> combine(gd, valuecols(gd) .=> mean) 3×5 DataFrame Row │ Species SepalLength_mean SepalWidth_mean PetalLength_mean P ⋯ - │ InlineSt… Float64 Float64 Float64 F ⋯ + │ String15 Float64 Float64 Float64 F ⋯ ─────┼────────────────────────────────────────────────────────────────────────── 1 │ Iris-setosa 5.006 3.418 1.464 ⋯ 2 │ Iris-versicolor 5.936 2.77 4.26 From d79aa5d5764a9018a0c4be48ab327cac6b3b7446 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Thu, 9 Sep 2021 11:04:00 +0200 Subject: [PATCH 3/9] fix type printing inconsistency --- NEWS.md | 5 +++++ src/abstractdataframe/show.jl | 15 ++++----------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/NEWS.md b/NEWS.md index 07cce6bb69..328b7e0de0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -76,6 +76,11 @@ * fix a problem with `unstack` on empty data frame ([#2842](https://github.com/JuliaData/DataFrames.jl/issues/2842)) +## Other changes + +* Column element type now always only shows the type name (without module name prefix) + ([#2865](https://github.com/JuliaData/DataFrames.jl/issues/2865)) + # DataFrames.jl v1.2.2 Patch Release Notes ## Bug fixes diff --git a/src/abstractdataframe/show.jl b/src/abstractdataframe/show.jl index eeb31668a9..32d35fa10e 100644 --- a/src/abstractdataframe/show.jl +++ b/src/abstractdataframe/show.jl @@ -106,12 +106,12 @@ function compacttype(T::Type, maxwidth::Int=8) T === Any && return "Any" T === Missing && return "Missing" - sT = string(T) + sT = string(nameof(T)) textwidth(sT) ≤ maxwidth && return sT if T >: Missing T = nonmissingtype(T) - sT = string(T) + sT = string(nameof(T)) suffix = "?" textwidth(sT) ≤ maxwidth && return sT * suffix else @@ -122,17 +122,10 @@ function compacttype(T::Type, maxwidth::Int=8) # This is only type display shortening so we # are OK with any T whose name starts with CategoricalValue here - if startswith(sT, "CategoricalValue") || startswith(sT, "CategoricalArrays.CategoricalValue") - sT = string(nameof(T)) - if textwidth(sT) ≤ maxwidth - return sT * "…" * suffix - else - return (maxwidth ≥ 11 ? "Categorical…" : "Cat…") * suffix - end + if startswith(sT, "CategoricalValue") + return (maxwidth ≥ 11 ? "Categorical…" : "Cat…") * suffix elseif T isa Union return "Union…" * suffix - else - sT = string(nameof(T)) end cumwidth = 0 From 27fdc8870310073dda6095f23c7c5c30577ee5c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Thu, 9 Sep 2021 11:10:52 +0200 Subject: [PATCH 4/9] fix union case --- src/abstractdataframe/show.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/abstractdataframe/show.jl b/src/abstractdataframe/show.jl index 32d35fa10e..301ea0e24f 100644 --- a/src/abstractdataframe/show.jl +++ b/src/abstractdataframe/show.jl @@ -106,12 +106,12 @@ function compacttype(T::Type, maxwidth::Int=8) T === Any && return "Any" T === Missing && return "Missing" - sT = string(nameof(T)) + sT = string(T isa Union ? T : nameof(T)) textwidth(sT) ≤ maxwidth && return sT if T >: Missing T = nonmissingtype(T) - sT = string(nameof(T)) + sT = string(T isa Union ? T : nameof(T)) suffix = "?" textwidth(sT) ≤ maxwidth && return sT * suffix else From 4c21c804cf4cebc4feaf93bf0de0d5517061b914 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Thu, 9 Sep 2021 11:35:02 +0200 Subject: [PATCH 5/9] another attempt to fix things --- src/abstractdataframe/show.jl | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/abstractdataframe/show.jl b/src/abstractdataframe/show.jl index 301ea0e24f..5ed536f790 100644 --- a/src/abstractdataframe/show.jl +++ b/src/abstractdataframe/show.jl @@ -106,12 +106,12 @@ function compacttype(T::Type, maxwidth::Int=8) T === Any && return "Any" T === Missing && return "Missing" - sT = string(T isa Union ? T : nameof(T)) + sT = string(T) textwidth(sT) ≤ maxwidth && return sT if T >: Missing T = nonmissingtype(T) - sT = string(T isa Union ? T : nameof(T)) + sT = string(T) suffix = "?" textwidth(sT) ≤ maxwidth && return sT * suffix else @@ -122,12 +122,24 @@ function compacttype(T::Type, maxwidth::Int=8) # This is only type display shortening so we # are OK with any T whose name starts with CategoricalValue here - if startswith(sT, "CategoricalValue") - return (maxwidth ≥ 11 ? "Categorical…" : "Cat…") * suffix + if startswith(sT, "CategoricalValue") || startswith(sT, "CategoricalArrays.CategoricalValue") + sT = string(nameof(T)) + if textwidth(sT) ≤ maxwidth + return sT * "…" * suffix + else + return (maxwidth ≥ 11 ? "Categorical…" : "Cat…") * suffix + end elseif T isa Union return "Union…" * suffix + else + sTfull = sT + sT = string(nameof(T)) end + # handle the case when the type printed is not parametric but string(T) + # prefixed it with the module name which caused it to be overlong + textwidth(sT) ≤ maxwidth && endswith(sTfull, sT) && return sT + cumwidth = 0 stop = 0 for (i, c) in enumerate(sT) From 2edfa1e102110daae2f039b36b41b8f4a030355a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Thu, 9 Sep 2021 11:35:42 +0200 Subject: [PATCH 6/9] correct off-by-one issue in printing --- src/abstractdataframe/show.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/abstractdataframe/show.jl b/src/abstractdataframe/show.jl index 5ed536f790..735b0832eb 100644 --- a/src/abstractdataframe/show.jl +++ b/src/abstractdataframe/show.jl @@ -138,7 +138,7 @@ function compacttype(T::Type, maxwidth::Int=8) # handle the case when the type printed is not parametric but string(T) # prefixed it with the module name which caused it to be overlong - textwidth(sT) ≤ maxwidth && endswith(sTfull, sT) && return sT + textwidth(sT) ≤ maxwidth + 1 && endswith(sTfull, sT) && return sT cumwidth = 0 stop = 0 From 336f66ee6ffceef41f168aca0a2a211d67a430e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Thu, 9 Sep 2021 12:00:18 +0200 Subject: [PATCH 7/9] remove NEWS.md entry --- NEWS.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index 328b7e0de0..07cce6bb69 100644 --- a/NEWS.md +++ b/NEWS.md @@ -76,11 +76,6 @@ * fix a problem with `unstack` on empty data frame ([#2842](https://github.com/JuliaData/DataFrames.jl/issues/2842)) -## Other changes - -* Column element type now always only shows the type name (without module name prefix) - ([#2865](https://github.com/JuliaData/DataFrames.jl/issues/2865)) - # DataFrames.jl v1.2.2 Patch Release Notes ## Bug fixes From c6a99c282abcc9c60b1637afbaa6856334385c97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Thu, 9 Sep 2021 12:57:18 +0200 Subject: [PATCH 8/9] small updates in printing --- src/abstractdataframe/io.jl | 6 +++--- src/abstractdataframe/show.jl | 4 ++-- test/io.jl | 12 ++++++------ test/show.jl | 4 ++-- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/abstractdataframe/io.jl b/src/abstractdataframe/io.jl index cb5adc0903..7f2f29f70e 100755 --- a/src/abstractdataframe/io.jl +++ b/src/abstractdataframe/io.jl @@ -49,7 +49,7 @@ function getmaxwidths(df::AbstractDataFrame, undefstrwidth = ourstrwidth(io, "#undef", buffer, truncstring) - ct = show_eltype ? batch_compacttype(Any[eltype(c) for c in eachcol(df)]) : String[] + ct = show_eltype ? batch_compacttype(Any[eltype(c) for c in eachcol(df)], 9) : String[] j = 1 for (col_idx, (name, col)) in enumerate(pairs(eachcol(df))) # (1) Consider length of column name @@ -211,7 +211,7 @@ function _show(io::IO, ::MIME"text/html", df::AbstractDataFrame; # which the users can hover over. The limit of 256 characters is arbitrary, but # we want some maximum limit, since the types can sometimes get really-really long. types = Any[eltype(df[!, idx]) for idx in 1:mxcol] - ct, ct_title = batch_compacttype(types), batch_compacttype(types, 256) + ct, ct_title = batch_compacttype(types, 9), batch_compacttype(types, 256) for j in 1:mxcol s = html_escape(ct[j]) title = html_escape(ct_title[j]) @@ -380,7 +380,7 @@ function _show(io::IO, ::MIME"text/latex", df::AbstractDataFrame; write(io, "\t\\hline\n") if eltypes write(io, "\t& ") - ct = batch_compacttype(Any[eltype(df[!, idx]) for idx in 1:mxcol]) + ct = batch_compacttype(Any[eltype(df[!, idx]) for idx in 1:mxcol], 9) header = join(latex_escape.(ct), " & ") write(io, header) mxcol < size(df, 2) && write(io, " & ") diff --git a/src/abstractdataframe/show.jl b/src/abstractdataframe/show.jl index 735b0832eb..1e216290b1 100644 --- a/src/abstractdataframe/show.jl +++ b/src/abstractdataframe/show.jl @@ -82,7 +82,7 @@ function batch_compacttype(types::Vector{Any}, maxwidths::Vector{Int}) end end -function batch_compacttype(types::Vector{Any}, maxwidth::Int=8) +function batch_compacttype(types::Vector{Any}, maxwidth::Int) cache = Dict{Type, String}() return map(types) do T get!(cache, T) do @@ -100,7 +100,7 @@ For displaying data frame we do not want string representation of type to be longer than `maxwidth`. This function implements rules how type names are cropped if they are longer than `maxwidth`. """ -function compacttype(T::Type, maxwidth::Int=8) +function compacttype(T::Type, maxwidth::Int) maxwidth = max(8, maxwidth) T === Any && return "Any" diff --git a/test/io.jl b/test/io.jl index cd07b9fb36..6c2db7095f 100644 --- a/test/io.jl +++ b/test/io.jl @@ -22,7 +22,7 @@ import Main: QuoteTestType \\begin{tabular}{r|ccccccc} \t& A & B & C & D & E & F & G\\\\ \t\\hline - \t& Int64 & String & String & Float64? & Cat…? & String & MD…\\\\ + \t& Int64 & String & String & Float64? & Cat…? & String & MD\\\\ \t\\hline \t1 & 1 & \\\$10.0 & A & 1.0 & a & \\emph{\\#undef} & \\href{http://juliadata.github.io/DataFrames.jl}{DataFrames.jl} \\\\ \t2 & 2 & M\\&F & B & 2.0 & \\emph{missing} & \\emph{\\#undef} & \\#\\#\\#A \\\\ @@ -167,7 +167,7 @@ end @test repr(MIME("text/html"), df) == "

4 rows × 2 columns

" * "" * - "" * + "" * "" * "" * "" * @@ -196,7 +196,7 @@ end "" * "" * "" * - "" * + "" * "" * "" * "" * @@ -290,7 +290,7 @@ end """ 8×2 DataFrame Row │ A B - │ Int64 MD… + │ Int64 MD ─────┼────────────────────────────────────────── 1 │ 1 DataFrames.jl (http://juliadat… 2 │ 4 \\frac{x^2}{x^2+y^2} @@ -305,7 +305,7 @@ end """ 8×2 DataFrame Row │ A B - │ Int64 MD… + │ Int64 MD ─────┼────────────────────────────────────────── 1 │ 1 DataFrames.jl (http://juliadat… 2 │ 4 \\frac{x^2}{x^2+y^2} @@ -355,7 +355,7 @@ end "

8 rows × 2 columns

" * "
AB
Int64MD…
Int64MD
11
24

###A

\n
StringAnyQuoteTe…QuoteTes…
" * "" * - "" * + "" * "" * "" * "
AB
Int64MD…
Int64MD
11
" * diff --git a/test/show.jl b/test/show.jl index b66af2d73c..0461b1e21f 100644 --- a/test/show.jl +++ b/test/show.jl @@ -301,8 +301,8 @@ end @test sprint(show, df) == """ 1×3 DataFrame Row │ a b c - │ Date DateTime Day - ─────┼──────────────────────────────────────── + │ Date DateTime Dates.Day + ─────┼──────────────────────────────────────────── 1 │ 2020-02-11 2020-02-11T15:00:00 1 day""" # Irrational From aeae38ba058ebf18fd3cc27d0d5371237c7d111d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Thu, 9 Sep 2021 14:05:10 +0200 Subject: [PATCH 9/9] small test update --- test/show.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/show.jl b/test/show.jl index 0461b1e21f..b66af2d73c 100644 --- a/test/show.jl +++ b/test/show.jl @@ -301,8 +301,8 @@ end @test sprint(show, df) == """ 1×3 DataFrame Row │ a b c - │ Date DateTime Dates.Day - ─────┼──────────────────────────────────────────── + │ Date DateTime Day + ─────┼──────────────────────────────────────── 1 │ 2020-02-11 2020-02-11T15:00:00 1 day""" # Irrational