From f6303ce324f401d9873023784b4476bdefb8460c Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Sun, 9 Apr 2023 12:57:58 -0400
Subject: [PATCH 01/15] progress
---
src/macros.jl | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/src/macros.jl b/src/macros.jl
index 51c5a221..f8dd2879 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -1459,11 +1459,17 @@ end
function transform_helper(x, args...)
- x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = false)
+ a1 = first(args)
+ if is_macro_head(a1, "@when")
+ x, exprs, outer_flags, kw = get_df_args_kwargs(x, args[2:end]...; wrap_byrow = false)
+ t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
- t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
- quote
- $transform($x, $(t...); $(kw...))
+ z = subset_helper(:($copy($x)), a1.args[2:end]..., :(@kwarg view = true))
+
+ :($parent($transform!($z, $(t...); $(kw...))))
+ else
+ t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
+ :($transform($x, $(t...); $(kw...)))
end
end
From 8573e3c3e8138cfc8c1cf2b60fabf5751b34c923 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Fri, 15 Dec 2023 14:03:27 -0500
Subject: [PATCH 02/15] toying
---
src/macros.jl | 30 +++++++++++++++++++++++++++---
test/when.jl | 10 ++++++++++
2 files changed, 37 insertions(+), 3 deletions(-)
create mode 100644 test/when.jl
diff --git a/src/macros.jl b/src/macros.jl
index dc9b86ae..42df418b 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -1457,17 +1457,41 @@ end
##
##############################################################################
+function contains_when(args...)
+ for arg in args
+ if is_macro_head(arg, "@when")
+ return true
+ end
+ end
+ return false
+end
+
+function when_helper(x, args...)
+ x, exprs, outer_flags, kw = get_df_args_kwargs(x, args[2:end]...; wrap_byrow = false)
+ t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
+
+ z = subset_helper(:($copy($x)), a1.args[2:end]..., :(@kwarg view = true))
+
+ :($parent($transform!($z, $(t...); $(kw...))))
+end
function transform_helper(x, args...)
a1 = first(args)
- if is_macro_head(a1, "@when")
+ when = is_macro_head(a1, "@when")
+ rwhen = is_macro_head(a1, "@rwhen")
+ if when || rwhen
+ if when
+ z = subset_helper(:($copy($x)), a1.args[2:end]..., :(@kwarg view = true))
+ else #rwhen
+ z = rsubset_helper(:($copy($x)), a1.args[2:end]..., :(@kwarg view = true))
+ end
+
x, exprs, outer_flags, kw = get_df_args_kwargs(x, args[2:end]...; wrap_byrow = false)
t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
- z = subset_helper(:($copy($x)), a1.args[2:end]..., :(@kwarg view = true))
-
:($parent($transform!($z, $(t...); $(kw...))))
else
+ x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = false)
t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
:($transform($x, $(t...); $(kw...)))
end
diff --git a/test/when.jl b/test/when.jl
new file mode 100644
index 00000000..04f69554
--- /dev/null
+++ b/test/when.jl
@@ -0,0 +1,10 @@
+module TestWhen
+
+using Test
+using DataFrames
+using DataFramesMeta
+using Statistics
+
+const ≅ = isequal
+
+end # module
\ No newline at end of file
From 2f5f49a3d2f60e8ef1e75a1eae054de1ac5d61c2 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Tue, 19 Dec 2023 09:55:46 -0500
Subject: [PATCH 03/15] implementation
---
src/macros.jl | 93 ++++++++++++++++++++++++++++----------------------
src/parsing.jl | 3 +-
2 files changed, 54 insertions(+), 42 deletions(-)
diff --git a/src/macros.jl b/src/macros.jl
index 42df418b..63f62c19 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -1456,45 +1456,10 @@ end
## transform & @transform
##
##############################################################################
-
-function contains_when(args...)
- for arg in args
- if is_macro_head(arg, "@when")
- return true
- end
- end
- return false
-end
-
-function when_helper(x, args...)
- x, exprs, outer_flags, kw = get_df_args_kwargs(x, args[2:end]...; wrap_byrow = false)
- t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
-
- z = subset_helper(:($copy($x)), a1.args[2:end]..., :(@kwarg view = true))
-
- :($parent($transform!($z, $(t...); $(kw...))))
-end
-
function transform_helper(x, args...)
- a1 = first(args)
- when = is_macro_head(a1, "@when")
- rwhen = is_macro_head(a1, "@rwhen")
- if when || rwhen
- if when
- z = subset_helper(:($copy($x)), a1.args[2:end]..., :(@kwarg view = true))
- else #rwhen
- z = rsubset_helper(:($copy($x)), a1.args[2:end]..., :(@kwarg view = true))
- end
-
- x, exprs, outer_flags, kw = get_df_args_kwargs(x, args[2:end]...; wrap_byrow = false)
- t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
-
- :($parent($transform!($z, $(t...); $(kw...))))
- else
- x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = false)
- t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
- :($transform($x, $(t...); $(kw...)))
- end
+ x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = false)
+ t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
+ :($transform($x, $(t...); $(kw...)))
end
"""
@@ -1622,12 +1587,58 @@ macro transform(x, args...)
esc(transform_helper(x, args...))
end
+omit_nested_when(ex, when = Ref(false)) = ex, when
+function omit_nested_when(ex::Expr, when = Ref(false))
+ if ex.head == :macrocall && ex.args[1] in keys(DEFAULT_FLAGS)
+ macroname = ex.args[1]
+ if macroname == WHEN_SYM
+ when[] = true
+ return omit_nested_when(MacroTools.unblock(ex.args[3]), when)
+ else
+ new_expr, when = omit_nested_when(MacroTools.unblock(ex.args[3]), when)
+ ex.args[3] = new_expr
+ end
+ end
+ return ex, when
+end
+
+function get_when_statements(exprs)
+ new_exprs = []
+ when_statements = []
+ seen_non_when = false
+ for expr in exprs
+ e, when = omit_nested_when(expr)
+ if when[]
+ if seen_non_when
+ throw(ArgumentError("All @when statements must come first"))
+ end
+ push!(when_statements, e)
+ else
+ seen_non_when = true
+ push!(new_exprs, expr)
+ end
+ end
+
+ new_exprs, when_statements
+end
+
function rtransform_helper(x, args...)
x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = true)
- t = (fun_to_vec(ex; gensym_names=false, outer_flags=outer_flags) for ex in exprs)
- quote
- $transform($x, $(t...); $(kw...))
+ exprs, whens = get_when_statements(exprs)
+ if !isempty(whens)
+ w = (fun_to_vec(ex; no_dest = true, gensym_names=false, outer_flags=outer_flags) for ex in whens)
+ t = (fun_to_vec(ex; gensym_names=false, outer_flags=outer_flags) for ex in exprs)
+ z = gensym()
+ quote
+ $z = $subset($copy($x), $(w...); view = true)
+ $parent($transform!($z, $(t...); $(kw...)))
+ end
+ else
+ t = (fun_to_vec(ex; gensym_names=false, outer_flags=outer_flags) for ex in exprs)
+ quote
+ $transform($x, $(t...); $(kw...))
+ end
end
end
diff --git a/src/parsing.jl b/src/parsing.jl
index 4f1b9e00..99606f78 100644
--- a/src/parsing.jl
+++ b/src/parsing.jl
@@ -167,7 +167,8 @@ is_macro_head(ex::Expr, name) = ex.head == :macrocall && ex.args[1] == Symbol(na
const BYROW_SYM = Symbol("@byrow")
const PASSMISSING_SYM = Symbol("@passmissing")
const ASTABLE_SYM = Symbol("@astable")
-const DEFAULT_FLAGS = (;BYROW_SYM => Ref(false), PASSMISSING_SYM => Ref(false), ASTABLE_SYM => Ref(false))
+const WHEN_SYM = Symbol("@when")
+const DEFAULT_FLAGS = (;BYROW_SYM => Ref(false), PASSMISSING_SYM => Ref(false), ASTABLE_SYM => Ref(false), WHEN_SYM => Ref(false))
extract_macro_flags(ex, exprflags = deepcopy(DEFAULT_FLAGS)) = (ex, exprflags)
function extract_macro_flags(ex::Expr, exprflags = deepcopy(DEFAULT_FLAGS))
From e7c1a01bcea9f563a7641d1205dacb2493b9c9b2 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Tue, 19 Dec 2023 10:25:26 -0500
Subject: [PATCH 04/15] simplify implementation
---
src/macros.jl | 28 ++++++++++++++++++++++------
1 file changed, 22 insertions(+), 6 deletions(-)
diff --git a/src/macros.jl b/src/macros.jl
index 63f62c19..26ea3b15 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -1622,26 +1622,42 @@ function get_when_statements(exprs)
new_exprs, when_statements
end
-function rtransform_helper(x, args...)
- x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = true)
+function generic_transform_helper(x, args...; wrap_byrow::Bool = false, modify::Bool = false)
+ if modify == true
+ transformfun = transform!
+ else
+ transformfun = transform
+ end
+ x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = wrap_byrow)
exprs, whens = get_when_statements(exprs)
if !isempty(whens)
w = (fun_to_vec(ex; no_dest = true, gensym_names=false, outer_flags=outer_flags) for ex in whens)
t = (fun_to_vec(ex; gensym_names=false, outer_flags=outer_flags) for ex in exprs)
z = gensym()
- quote
- $z = $subset($copy($x), $(w...); view = true)
- $parent($transform!($z, $(t...); $(kw...)))
+ if modify
+ quote
+ $z = $subset($x, $(w...); view = true)
+ $parent($transform!($z, $(t...); $(kw...)))
+ end
+ else
+ quote
+ $z = $subset($copy($x), $(w...); view = true)
+ $parent($transform!($z, $(t...); $(kw...)))
+ end
end
else
t = (fun_to_vec(ex; gensym_names=false, outer_flags=outer_flags) for ex in exprs)
quote
- $transform($x, $(t...); $(kw...))
+ $transformfun($x, $(t...); $(kw...))
end
end
end
+function rtransform_helper(x, args...)
+ generic_transform_helper(x, args...; wrap_byrow = true, modify = false)
+end
+
"""
@rtransform(x, args...; kwargs...)
From 54a497f7f50998c66d67b5942898f855ba7d271b Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Tue, 19 Dec 2023 12:33:00 -0500
Subject: [PATCH 05/15] add for with
---
src/macros.jl | 175 +++++++++++++++++++------------------------------
src/parsing.jl | 44 ++++++++++++-
2 files changed, 109 insertions(+), 110 deletions(-)
diff --git a/src/macros.jl b/src/macros.jl
index 26ea3b15..931dff8d 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -547,11 +547,24 @@ getsinglecolumn(df, s) = throw(ArgumentError("Only indexing with Symbols, string
"is currently allowed with $DOLLAR"))
function with_helper(d, body)
+ body, outer_flags = extract_macro_flags(body)
+ if body isa Expr && body.head == :block
+ es, whens = get_when_statements(MacroTools.rmlines(body).args)
+ end
# Make body an expression to force the
# complicated method of fun_to_vec
# in the case of QuoteNode
- t = fun_to_vec(Expr(:block, body); no_dest=true)
- :(DataFramesMeta.exec($d, $t))
+ t = fun_to_vec(Expr(:block, es...); no_dest=true, outer_flags = outer_flags)
+ if !isempty(whens)
+ w = (fun_to_vec(ex; no_dest = true, gensym_names=false, outer_flags = outer_flags) for ex in whens)
+ z = gensym()
+ quote
+ $z = $subset($d, $(w...); view = true)
+ $exec($z, $t)
+ end
+ else
+ :($exec($d, $t))
+ end
end
"""
@@ -1456,10 +1469,51 @@ end
## transform & @transform
##
##############################################################################
+function generic_transform_select_helper(x, args...; wrap_byrow::Bool = false, modify::Bool = false, selectfun::Bool = false)
+ if selectfun
+ secondstagefun = select!
+ if modify
+ transformfun = select!
+ else
+ transformfun = select
+ end
+ else
+ secondstagefun = transform!
+ if modify
+ transformfun = transform!
+ else
+ transformfun = transform
+ end
+ end
+
+ x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = wrap_byrow)
+
+ exprs, whens = get_when_statements(exprs)
+ if !isempty(whens)
+ w = (fun_to_vec(ex; no_dest = true, gensym_names=false, outer_flags=outer_flags) for ex in whens)
+ t = (fun_to_vec(ex; gensym_names=false, outer_flags=outer_flags) for ex in exprs)
+ z = gensym()
+ if modify
+ quote
+ $z = $subset($x, $(w...); view = true)
+ $parent($secondstagefun($z, $(t...); $(kw...)))
+ end
+ else
+ quote
+ $z = $subset($copy($x), $(w...); view = true)
+ $parent($secondstagefun($z, $(t...); $(kw...)))
+ end
+ end
+ else
+ t = (fun_to_vec(ex; gensym_names=false, outer_flags=outer_flags) for ex in exprs)
+ quote
+ $transformfun($x, $(t...); $(kw...))
+ end
+ end
+end
+
function transform_helper(x, args...)
- x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = false)
- t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
- :($transform($x, $(t...); $(kw...)))
+ generic_transform_select_helper(x, args...; wrap_byrow = false, modify = false)
end
"""
@@ -1587,75 +1641,8 @@ macro transform(x, args...)
esc(transform_helper(x, args...))
end
-omit_nested_when(ex, when = Ref(false)) = ex, when
-function omit_nested_when(ex::Expr, when = Ref(false))
- if ex.head == :macrocall && ex.args[1] in keys(DEFAULT_FLAGS)
- macroname = ex.args[1]
- if macroname == WHEN_SYM
- when[] = true
- return omit_nested_when(MacroTools.unblock(ex.args[3]), when)
- else
- new_expr, when = omit_nested_when(MacroTools.unblock(ex.args[3]), when)
- ex.args[3] = new_expr
- end
- end
- return ex, when
-end
-
-function get_when_statements(exprs)
- new_exprs = []
- when_statements = []
- seen_non_when = false
- for expr in exprs
- e, when = omit_nested_when(expr)
- if when[]
- if seen_non_when
- throw(ArgumentError("All @when statements must come first"))
- end
- push!(when_statements, e)
- else
- seen_non_when = true
- push!(new_exprs, expr)
- end
- end
-
- new_exprs, when_statements
-end
-
-function generic_transform_helper(x, args...; wrap_byrow::Bool = false, modify::Bool = false)
- if modify == true
- transformfun = transform!
- else
- transformfun = transform
- end
- x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = wrap_byrow)
-
- exprs, whens = get_when_statements(exprs)
- if !isempty(whens)
- w = (fun_to_vec(ex; no_dest = true, gensym_names=false, outer_flags=outer_flags) for ex in whens)
- t = (fun_to_vec(ex; gensym_names=false, outer_flags=outer_flags) for ex in exprs)
- z = gensym()
- if modify
- quote
- $z = $subset($x, $(w...); view = true)
- $parent($transform!($z, $(t...); $(kw...)))
- end
- else
- quote
- $z = $subset($copy($x), $(w...); view = true)
- $parent($transform!($z, $(t...); $(kw...)))
- end
- end
- else
- t = (fun_to_vec(ex; gensym_names=false, outer_flags=outer_flags) for ex in exprs)
- quote
- $transformfun($x, $(t...); $(kw...))
- end
- end
-end
-
function rtransform_helper(x, args...)
- generic_transform_helper(x, args...; wrap_byrow = true, modify = false)
+ generic_transform_select_helper(x, args...; wrap_byrow = true, modify = false)
end
"""
@@ -1703,12 +1690,7 @@ end
function transform!_helper(x, args...)
- x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = false)
-
- t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
- quote
- $transform!($x, $(t...); $(kw...))
- end
+ generic_transform_select_helper(x, args...; wrap_byrow = false, modify = true)
end
"""
@@ -1817,12 +1799,7 @@ macro transform!(x, args...)
end
function rtransform!_helper(x, args...)
- x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = true)
-
- t = (fun_to_vec(ex; gensym_names=false, outer_flags=outer_flags) for ex in exprs)
- quote
- $transform!($x, $(t...); $(kw...))
- end
+ generic_transform_select_helper(x, args...; wrap_byrow = true, modify = true)
end
"""
@@ -1841,12 +1818,7 @@ end
##############################################################################
function select_helper(x, args...)
- x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = false)
-
- t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
- quote
- $select($x, $(t...); $(kw...))
- end
+ generic_transform_select_helper(x, args...; wrap_byrow = false, modify = false, selectfun = true)
end
"""
@@ -1974,12 +1946,7 @@ macro select(x, args...)
end
function rselect_helper(x, args...)
- x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = true)
-
- t = (fun_to_vec(ex; gensym_names=false, outer_flags=outer_flags) for ex in exprs)
- quote
- $select($x, $(t...); $(kw...))
- end
+ generic_transform_select_helper(x, args...; wrap_byrow = true, modify = false, selectfun = true)
end
"""
@@ -2027,12 +1994,7 @@ end
##############################################################################
function select!_helper(x, args...)
- x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = false)
-
- t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
- quote
- $select!($x, $(t...); $(kw...))
- end
+ generic_transform_select_helper(x, args...; wrap_byrow = true, modify = true, selectfun = true)
end
"""
@@ -2155,12 +2117,7 @@ macro select!(x, args...)
end
function rselect!_helper(x, args...)
- x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = true)
-
- t = (fun_to_vec(ex; gensym_names=false, outer_flags=outer_flags) for ex in exprs)
- quote
- $select!($x, $(t...); $(kw...))
- end
+ generic_transform_select_helper(x, args...; wrap_byrow = true, modify = true, selectfun = true)
end
"""
diff --git a/src/parsing.jl b/src/parsing.jl
index 99606f78..2b9a1fd9 100644
--- a/src/parsing.jl
+++ b/src/parsing.jl
@@ -168,7 +168,7 @@ const BYROW_SYM = Symbol("@byrow")
const PASSMISSING_SYM = Symbol("@passmissing")
const ASTABLE_SYM = Symbol("@astable")
const WHEN_SYM = Symbol("@when")
-const DEFAULT_FLAGS = (;BYROW_SYM => Ref(false), PASSMISSING_SYM => Ref(false), ASTABLE_SYM => Ref(false), WHEN_SYM => Ref(false))
+const DEFAULT_FLAGS = (;BYROW_SYM => Ref(false), PASSMISSING_SYM => Ref(false), ASTABLE_SYM => Ref(false))
extract_macro_flags(ex, exprflags = deepcopy(DEFAULT_FLAGS)) = (ex, exprflags)
function extract_macro_flags(ex::Expr, exprflags = deepcopy(DEFAULT_FLAGS))
@@ -188,6 +188,48 @@ function extract_macro_flags(ex::Expr, exprflags = deepcopy(DEFAULT_FLAGS))
return (ex, exprflags)
end
+"""
+ omit_nested_when(ex::Expr, when = Ref(false))
+
+For a statement of the form `@passmissing @when x` return `@passmissing x` and
+a flag signifying a `@when` statement was present.
+"""
+function omit_nested_when(ex::Expr, when = Ref(false))
+ if ex.head == :macrocall && ex.args[1] in keys(DEFAULT_FLAGS) || is_macro_head(ex, "@when")
+ macroname = ex.args[1]
+ if macroname == Symbol("@when")
+ when[] = true
+ return omit_nested_when(MacroTools.unblock(ex.args[3]), when)
+ else
+ new_expr, when = omit_nested_when(MacroTools.unblock(ex.args[3]), when)
+ ex.args[3] = new_expr
+ end
+ end
+ return ex, when
+end
+omit_nested_when(ex, when = Ref(false)) = ex, when
+
+function get_when_statements(exprs)
+ new_exprs = []
+ when_statements = []
+ seen_non_when = false
+ for expr in exprs
+ e, when = omit_nested_when(expr)
+ if when[]
+ if seen_non_when
+ throw(ArgumentError("All @when statements must come first"))
+ end
+ push!(when_statements, e)
+ else
+ seen_non_when = true
+ push!(new_exprs, expr)
+ end
+ end
+
+ new_exprs, when_statements
+end
+
+
"""
check_macro_flags_consistency(exprflags)
From d9e45674467e50f607af93c00216760599071b77 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Tue, 19 Dec 2023 14:50:09 -0500
Subject: [PATCH 06/15] add with
---
src/macros.jl | 16 ++++++++++++----
src/parsing.jl | 3 +++
2 files changed, 15 insertions(+), 4 deletions(-)
diff --git a/src/macros.jl b/src/macros.jl
index 931dff8d..b56df4fa 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -547,14 +547,23 @@ getsinglecolumn(df, s) = throw(ArgumentError("Only indexing with Symbols, string
"is currently allowed with $DOLLAR"))
function with_helper(d, body)
+ # Get rid of the leading @byrow, @passmissing etc.
+ # but otherwise leave body untouched
body, outer_flags = extract_macro_flags(body)
- if body isa Expr && body.head == :block
- es, whens = get_when_statements(MacroTools.rmlines(body).args)
+ if outer_flags[ASTABLE_SYM][]
+ throw(ArgumentError("@astable macro-flag cannot be used inside of @with"))
end
+ # If we have a begin...end somewhere, we might
+ # have a @when.
+ # Remove the @when statements, recording that they
+ # exist. To do this we also have to de-construct
+ # body into a vector expressions.
+ es, whens = get_when_statements(MacroTools.rmlines(MacroTools.block(body)).args)
+ newbody = MacroTools.block(es...)
# Make body an expression to force the
# complicated method of fun_to_vec
# in the case of QuoteNode
- t = fun_to_vec(Expr(:block, es...); no_dest=true, outer_flags = outer_flags)
+ t = fun_to_vec(newbody; no_dest=true, outer_flags = outer_flags)
if !isempty(whens)
w = (fun_to_vec(ex; no_dest = true, gensym_names=false, outer_flags = outer_flags) for ex in whens)
z = gensym()
@@ -1487,7 +1496,6 @@ function generic_transform_select_helper(x, args...; wrap_byrow::Bool = false, m
end
x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = wrap_byrow)
-
exprs, whens = get_when_statements(exprs)
if !isempty(whens)
w = (fun_to_vec(ex; no_dest = true, gensym_names=false, outer_flags=outer_flags) for ex in whens)
diff --git a/src/parsing.jl b/src/parsing.jl
index 2b9a1fd9..ec436488 100644
--- a/src/parsing.jl
+++ b/src/parsing.jl
@@ -180,6 +180,9 @@ function extract_macro_flags(ex::Expr, exprflags = deepcopy(DEFAULT_FLAGS))
throw(ArgumentError("Redundant flag $macroname used."))
end
exprflag[] = true
+ if length(ex.args) > 3
+ throw(ArgumentError("Too many arguments passed to $macroname"))
+ end
return extract_macro_flags(MacroTools.unblock(ex.args[3]), exprflags)
else
return (ex, exprflags)
From dc165565275a87762601064e56f86ee24a1d3794 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Tue, 19 Dec 2023 17:22:07 -0500
Subject: [PATCH 07/15] tests
---
src/macros.jl | 23 +--
src/parsing.jl | 14 +-
test/when.jl | 435 +++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 458 insertions(+), 14 deletions(-)
diff --git a/src/macros.jl b/src/macros.jl
index b56df4fa..860d7b1f 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -558,17 +558,17 @@ function with_helper(d, body)
# Remove the @when statements, recording that they
# exist. To do this we also have to de-construct
# body into a vector expressions.
- es, whens = get_when_statements(MacroTools.rmlines(MacroTools.block(body)).args)
- newbody = MacroTools.block(es...)
+ es, when = get_when_statements(MacroTools.rmlines(MacroTools.block(body)).args)
+ newbody = Expr(:block, es...)
# Make body an expression to force the
# complicated method of fun_to_vec
# in the case of QuoteNode
t = fun_to_vec(newbody; no_dest=true, outer_flags = outer_flags)
- if !isempty(whens)
- w = (fun_to_vec(ex; no_dest = true, gensym_names=false, outer_flags = outer_flags) for ex in whens)
+ if !isnothing(when)
+ w = fun_to_vec(when; no_dest = true, gensym_names=false, outer_flags = outer_flags)
z = gensym()
quote
- $z = $subset($d, $(w...); view = true)
+ $z = $subset($d, $w; view = true, skipmissing = true)
$exec($z, $t)
end
else
@@ -1496,19 +1496,20 @@ function generic_transform_select_helper(x, args...; wrap_byrow::Bool = false, m
end
x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = wrap_byrow)
- exprs, whens = get_when_statements(exprs)
- if !isempty(whens)
- w = (fun_to_vec(ex; no_dest = true, gensym_names=false, outer_flags=outer_flags) for ex in whens)
+ exprs, when = get_when_statements(exprs)
+ # Main.@infiltrate
+ if !isnothing(when)
+ w = fun_to_vec(when; no_dest = true, gensym_names=false, outer_flags=outer_flags)
t = (fun_to_vec(ex; gensym_names=false, outer_flags=outer_flags) for ex in exprs)
z = gensym()
if modify
quote
- $z = $subset($x, $(w...); view = true)
+ $z = $subset($x, $w; view = true, skipmissing = true)
$parent($secondstagefun($z, $(t...); $(kw...)))
end
else
quote
- $z = $subset($copy($x), $(w...); view = true)
+ $z = $subset($copy($x), $w; view = true, skipmissing = true)
$parent($secondstagefun($z, $(t...); $(kw...)))
end
end
@@ -2002,7 +2003,7 @@ end
##############################################################################
function select!_helper(x, args...)
- generic_transform_select_helper(x, args...; wrap_byrow = true, modify = true, selectfun = true)
+ generic_transform_select_helper(x, args...; wrap_byrow = false, modify = true, selectfun = true)
end
"""
diff --git a/src/parsing.jl b/src/parsing.jl
index ec436488..8cddf510 100644
--- a/src/parsing.jl
+++ b/src/parsing.jl
@@ -200,6 +200,9 @@ a flag signifying a `@when` statement was present.
function omit_nested_when(ex::Expr, when = Ref(false))
if ex.head == :macrocall && ex.args[1] in keys(DEFAULT_FLAGS) || is_macro_head(ex, "@when")
macroname = ex.args[1]
+ if length(ex.args) > 3
+ throw(ArgumentError("Too many arguments passed to $macroname"))
+ end
if macroname == Symbol("@when")
when[] = true
return omit_nested_when(MacroTools.unblock(ex.args[3]), when)
@@ -214,22 +217,27 @@ omit_nested_when(ex, when = Ref(false)) = ex, when
function get_when_statements(exprs)
new_exprs = []
- when_statements = []
+ when_statement = nothing
seen_non_when = false
+ seen_when = false
for expr in exprs
e, when = omit_nested_when(expr)
if when[]
+ if seen_when
+ throw(ArgumentError("Only one @when statement allowed at a time"))
+ end
if seen_non_when
throw(ArgumentError("All @when statements must come first"))
end
- push!(when_statements, e)
+ seen_when = true
+ when_statement = e
else
seen_non_when = true
push!(new_exprs, expr)
end
end
- new_exprs, when_statements
+ new_exprs, when_statement
end
diff --git a/test/when.jl b/test/when.jl
index 04f69554..083e19c0 100644
--- a/test/when.jl
+++ b/test/when.jl
@@ -7,4 +7,439 @@ using Statistics
const ≅ = isequal
+@testset "@transform when" begin
+ df = DataFrame(a = [1, 2], z = [60, 70])
+ res = DataFrame(a = [1, 2], z = [60, 500], c = [missing, 5])
+ df2 = @transform df begin
+ @when :a .> 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+
+ df2 = @transform(df, @when(:a .> 1), :c = 5, :z = 500)
+ @test df2 ≅ res
+
+ df2 = @transform df @byrow begin
+ @when :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+
+ df2 = @transform df @byrow @passmissing begin
+ @when :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+
+ df2 = @transform df begin
+ @byrow @when :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+
+ df2 = @transform df begin
+ @when @byrow :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+
+
+ df2 = @transform df begin
+ @when @byrow :a > 1 ? true : missing
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+
+ dfa = copy(df)
+ dfa.a = [missing, 2]
+ df2 = @transform dfa begin
+ @when @passmissing @byrow :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ DataFrame(a = [missing, 2], z = [60, 500], c = [missing, 5])
+end
+
+@testset "@rtransform when" begin
+ df = DataFrame(a = [1, 2], z = [60, 70])
+ res = DataFrame(a = [1, 2], z = [60, 500], c = [missing, 5])
+ df2 = @rtransform df begin
+ @when :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+
+ df2 = @rtransform(df, @when(:a > 1), :c = 5, :z = 500)
+ @test df2 ≅ res
+
+ df2 = @rtransform df @passmissing begin
+ @when :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+
+ df2 = @rtransform df begin
+ @when :a > 1 ? true : missing
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+
+ dfa = copy(df)
+ dfa.a = [missing, 2]
+ df2 = @transform dfa begin
+ @when @passmissing @byrow :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ DataFrame(a = [missing, 2], z = [60, 500], c = [missing, 5])
+end
+
+@testset "@transform! when" begin
+ df_orig = DataFrame(a = [1, 2], z = [60, 70])
+ res = DataFrame(a = [1, 2], z = [60, 500], c = [missing, 5])
+ df = copy(df_orig)
+ df2 = @transform! df begin
+ @when :a .> 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+ @test df2 === df
+
+ df = copy(df_orig)
+ df2 = @transform!(df, @when(:a .> 1), :c = 5, :z = 500)
+ @test df2 ≅ res
+
+ df = copy(df_orig)
+ df2 = @transform! df @byrow begin
+ @when :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+ @test df2 === df
+
+ df = copy(df_orig)
+ df2 = @transform! df @byrow @passmissing begin
+ @when :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+ @test df2 === df
+
+ df = copy(df_orig)
+ df2 = @transform! df begin
+ @byrow @when :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+ @test df2 === df
+
+ df = copy(df_orig)
+ df2 = @transform! df begin
+ @when @byrow :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+ @test df2 === df
+
+ df = copy(df_orig)
+ df2 = @transform! df begin
+ @when @byrow :a > 1 ? true : missing
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+ @test df2 === df
+
+ dfa = copy(df_orig)
+ dfa.a = [missing, 2]
+ df = copy(dfa)
+ df2 = @transform! df begin
+ @when @passmissing @byrow :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ DataFrame(a = [missing, 2], z = [60, 500], c = [missing, 5])
+ @test df2 === df
+end
+
+
+@testset "@rtransform! when" begin
+ df_orig = DataFrame(a = [1, 2], z = [60, 70])
+ res = DataFrame(a = [1, 2], z = [60, 500], c = [missing, 5])
+ df = copy(df_orig)
+ df2 = @rtransform! df begin
+ @when :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+ @test df2 === df
+
+ df = copy(df_orig)
+ df2 = @rtransform!(df, @when(:a > 1), :c = 5, :z = 500)
+ @test df2 ≅ res
+
+ df = copy(df_orig)
+ df2 = @rtransform! df begin
+ @when :a > 1 ? true : missing
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+ @test df2 === df
+
+ dfa = copy(df_orig)
+ dfa.a = [missing, 2]
+ df = copy(dfa)
+ df2 = @rtransform! df begin
+ @when @passmissing :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ DataFrame(a = [missing, 2], z = [60, 500], c = [missing, 5])
+ @test df2 === df
+end
+
+@testset "@select when" begin
+ df = DataFrame(a = [1, 2], z = [60, 70])
+ res = DataFrame(c = [missing, 5], z = [60, 500])
+ df2 = @select df begin
+ @when :a .> 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+
+ df2 = @select(df, @when(:a .> 1), :c = 5, :z = 500)
+ @test df2 ≅ res
+
+ df2 = @select df @byrow begin
+ @when :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+
+ df2 = @select df @byrow @passmissing begin
+ @when :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+
+ df2 = @select df begin
+ @byrow @when :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+
+ df2 = @select df begin
+ @when @byrow :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+
+
+ df2 = @select df begin
+ @when @byrow :a > 1 ? true : missing
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+
+ dfa = copy(df)
+ dfa.a = [missing, 2]
+ df2 = @select dfa begin
+ @when @passmissing @byrow :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+end
+
+@testset "@rselect when" begin
+ df = DataFrame(a = [1, 2], z = [60, 70])
+ res = DataFrame(c = [missing, 5], z = [60, 500])
+ df2 = @rselect df begin
+ @when :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+
+ df2 = @rselect(df, @when(:a > 1), :c = 5, :z = 500)
+ @test df2 ≅ res
+
+ df2 = @rselect df @passmissing begin
+ @when :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+
+ df2 = @rselect df begin
+ @when :a > 1 ? true : missing
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+
+ dfa = copy(df)
+ dfa.a = [missing, 2]
+ df2 = @select dfa begin
+ @when @passmissing @byrow :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+end
+
+@testset "@select! when" begin
+ df_orig = DataFrame(a = [1, 2], z = [60, 70])
+ res = DataFrame(c = [missing, 5], z = [60, 500])
+ df = copy(df_orig)
+ df2 = @select! df begin
+ @when :a .> 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+ @test df2 === df
+
+ df = copy(df_orig)
+ df2 = @select!(df, @when(:a .> 1), :c = 5, :z = 500)
+ @test df2 ≅ res
+
+ df = copy(df_orig)
+ df2 = @select! df @byrow begin
+ @when :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+ @test df2 === df
+
+ df = copy(df_orig)
+ df2 = @select! df @byrow @passmissing begin
+ @when :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+ @test df2 === df
+
+ df = copy(df_orig)
+ df2 = @select! df begin
+ @byrow @when :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+ @test df2 === df
+
+ df = copy(df_orig)
+ df2 = @select! df begin
+ @when @byrow :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+ @test df2 === df
+
+ df = copy(df_orig)
+ df2 = @select! df begin
+ @when @byrow :a > 1 ? true : missing
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+ @test df2 === df
+
+ dfa = copy(df_orig)
+ dfa.a = [missing, 2]
+ df = copy(dfa)
+ df2 = @select! df begin
+ @when @passmissing @byrow :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+ @test df2 === df
+end
+
+
+@testset "@rselect! when" begin
+ df_orig = DataFrame(a = [1, 2], z = [60, 70])
+ res = DataFrame(c = [missing, 5], z = [60, 500])
+ df = copy(df_orig)
+ df2 = @rselect! df begin
+ @when :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+ @test df2 === df
+
+ df = copy(df_orig)
+ df2 = @rselect!(df, @when(:a > 1), :c = 5, :z = 500)
+ @test df2 ≅ res
+
+ df = copy(df_orig)
+ df2 = @rselect! df begin
+ @when :a > 1 ? true : missing
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+ @test df2 === df
+
+ dfa = copy(df_orig)
+ dfa.a = [missing, 2]
+ df = copy(dfa)
+ df2 = @rselect! df begin
+ @when @passmissing :a > 1
+ :c = 5
+ :z = 500
+ end
+ @test df2 ≅ res
+ @test df2 === df
+end
+
+@testset "@when many conditions" begin
+ df = DataFrame(a = [1, missing, 3, 4], z = [50, 60, 70, 80])
+ @transform df begin
+ @when :a .> 1
+ @when :a .> 2
+ :c = 5
+ end
+
+end
+
+
+@testset "@with when" begin
+ df = DataFrame(a = [1, 2], z = [60, 70])
+
+ t = @with df begin
+ @when :a .> 1
+ :z
+ end
+ @test t === view(df.z, 2:2)
+end
+
end # module
\ No newline at end of file
From 4bec3c0f66660bc6c0c059c1bf71201a4715143c Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Tue, 19 Dec 2023 17:29:15 -0500
Subject: [PATCH 08/15] more tests
---
test/when.jl | 20 +++++++++++++++++---
1 file changed, 17 insertions(+), 3 deletions(-)
diff --git a/test/when.jl b/test/when.jl
index 083e19c0..1a3e238a 100644
--- a/test/when.jl
+++ b/test/when.jl
@@ -421,14 +421,28 @@ end
@test df2 === df
end
-@testset "@when many conditions" begin
- df = DataFrame(a = [1, missing, 3, 4], z = [50, 60, 70, 80])
- @transform df begin
+df = DataFrame(a = [1, missing, 3, 4], z = [50, 60, 70, 80])
+@testset "@when errors" begin
+ @test_throws LoadError @eval @transform df begin
@when :a .> 1
@when :a .> 2
:c = 5
end
+ @test_throws LoadError @eval @transform df @when(:a .== 1) begin
+ :c = 1
+ :b = 2
+ end
+
+ @test_throws LoadError @eval @transform df @byrow @when(:a == 1) begin
+ :c = 1
+ :b = 2
+ end
+
+ @test_throws LoadError @eval @transform df @when(:a == 1) @byrow begin
+ :c = 1
+ :b = 2
+ end
end
From 9db9123bfc5bb2d3391e8ca2a0e1e230929f25e9 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Tue, 19 Dec 2023 17:46:20 -0500
Subject: [PATCH 09/15] add tests for with
---
test/when.jl | 31 +++++++++++++++++++++++++++++--
1 file changed, 29 insertions(+), 2 deletions(-)
diff --git a/test/when.jl b/test/when.jl
index 1a3e238a..dac0ff4e 100644
--- a/test/when.jl
+++ b/test/when.jl
@@ -443,17 +443,44 @@ df = DataFrame(a = [1, missing, 3, 4], z = [50, 60, 70, 80])
:c = 1
:b = 2
end
-end
+
+ @test_throws LoadError @eval @transform df @astable begin
+ @when :x == 1
+ :z = 1
+ end
+ @test_throws LoadError @eval @with df @when(:a == 1) begin
+ first(:z)
+ end
+end
+
@testset "@with when" begin
- df = DataFrame(a = [1, 2], z = [60, 70])
+ df = DataFrame(a = [missing, 2], z = [60, 70])
t = @with df begin
@when :a .> 1
:z
end
@test t === view(df.z, 2:2)
+
+ t = @with df @byrow begin
+ @when :a > 1
+ first(:z)
+ end
+ @test t == [70]
+
+ t = @with df begin
+ @when @byrow :a > 1
+ first(:z)
+ end
+ @test t == 70
+
+ t = @with df begin
+ @when @byrow @passmissing :a > 1 && true
+ first(:z)
+ end
+ @test t == 70
end
end # module
\ No newline at end of file
From 8807032392eedf8d07bb637e2d46c4054fa24a94 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Wed, 20 Dec 2023 10:48:25 -0500
Subject: [PATCH 10/15] progress on docs
---
docs/src/index.md | 129 +++++++++++++++++++++++++++++++++++++++++-
src/DataFramesMeta.jl | 2 +-
src/macros.jl | 77 +++++++++++++++++++++++++
3 files changed, 206 insertions(+), 2 deletions(-)
diff --git a/docs/src/index.md b/docs/src/index.md
index 062aaee2..f8eb2c46 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -24,6 +24,7 @@ In addition, DataFramesMeta provides
* `@byrow` for applying functions to each row of a data frame (only supported inside other macros).
* `@passmissing` for propagating missing values inside row-wise DataFramesMeta.jl transformations.
* `@astable` to create multiple columns within a single transformation.
+* `@when` to non-destructively work with a subset of observations (Similar to Stata's `if`)
* `@chain`, from [Chain.jl](https://github.com/jkrumbiegel/Chain.jl) for piping the above macros together, similar to [magrittr](https://cran.r-project.org/web/packages/magrittr/vignettes/magrittr.html)'s
`%>%` in R.
@@ -47,7 +48,133 @@ Use `passmissing` to propagate `missing` values more easily. See `?passmissing`
details. `passmissing` is defined in [Missings.jl](https://github.com/JuliaData/Missings.jl)
but exported by DataFramesMeta for convenience.
-# Provided macros
+# Provided macrosp = graph_mean_outp = graph_mean_outcomes_bins(
+ "dev_status",
+ "income_pctile",
+ "distance_bin", p = graph_mean_outcomes_bins(
+ "dev_status",
+ "income_pctile",
+ "distance_bin",
+ "hilly",
+ "pop_weight_country",
+ long_shapes)
+
+ "hilly",
+ "pop_weight_country",
+ long_shapes)
+p = graph_mean_outcomes_bins(
+ "dev_status",
+ "income_pctile",
+ "distance_bin",
+ "hilly",
+ "pop_weight_country",
+ long_shapes)
+p = graph_mean_outcomes_bins(
+ "dev_status",
+ "income_pctile",
+ "distance_bin",
+ "hilly",
+ "pop_weight_country",
+ long_shapes)
+p = graph_mean_outcomes_bins(
+ "dev_status",
+ "income_pctile",
+ "distance_bin",
+ "hilly",
+ "pop_weight_country",
+ long_shapes)
+p = graph_mean_outcomes_bins(
+ "dev_status",
+ "income_pctile",
+ "distance_bin",
+ "hilly",
+ "pop_weight_country",
+ long_shapes)
+p = graph_mean_outcomes_bins(
+ "dev_status",
+ "income_pctile",
+ "distance_bin",
+ "hilly",
+ "pop_weight_country",
+ long_shapes)
+p = graph_mean_outcomes_bins(
+ "dev_status",
+ "income_pctile",
+ "distance_bin",
+ "hilly",
+ "pop_weight_country",
+ long_shapes)
+p = graph_mean_outcomes_bins(
+ "dev_status",
+ "income_pctile",
+ "distance_bin",
+ "hilly",
+ "pop_weight_country",
+ long_shapes)
+p = graph_mean_outcomes_bins(
+ "dev_status",
+ "income_pctile",
+ "distance_bin",
+ "hilly",
+ "pop_weight_country",
+ long_shapes)
+p = graph_mean_outcomes_bins(
+ "dev_status",
+ "income_pctile",
+ "distance_bin",
+ "hilly",
+ "pop_weight_country",
+ long_shapes)
+p = graph_mean_outcomes_bins(
+ "dev_status",
+ "income_pctile",
+ "distance_bin",
+ "hilly",
+ "pop_weight_country",
+ long_shapes)
+p = graph_mean_outcomes_bins(
+ "dev_status",
+ "income_pctile",
+ "distance_bin",
+ "hilly",
+ "pop_weight_country",
+ long_shapes)
+p = graph_mean_outcomes_bins(
+ "dev_status",
+ "income_pctile",
+ "distance_bin",
+ "hilly",
+ "pop_weight_country",
+ long_shapes)
+cp = graph_mean_outcomes_bins(
+ "dev_status",
+ "income_pctile",
+ "distance_bin",
+ "hilly",
+ "pop_weight_country",
+ long_shapes)
+p = graph_mean_outcomes_bins(
+ "dev_status",
+ "income_pctile",
+ "distance_bin",
+ "hilly",
+ "pop_weight_country",
+ long_shapes)
+op = graph_mean_outcomes_bins(
+ "dev_status",
+ "income_pctile",
+ "distance_bin",
+ "hilly",
+ "pop_weight_country",
+ long_shapes)
+mes_bins(
+ "dev_status",
+ "income_pctile",
+ "distance_bin",
+ "hilly",
+ "pop_weight_country",
+ long_shapes)
+
!!! note
diff --git a/src/DataFramesMeta.jl b/src/DataFramesMeta.jl
index a16cca5a..a96916d7 100644
--- a/src/DataFramesMeta.jl
+++ b/src/DataFramesMeta.jl
@@ -20,7 +20,7 @@ export @with,
@rtransform, @rselect, @rtransform!, @rselect!,
@distinct, @rdistinct, @distinct!, @rdistinct!,
@eachrow, @eachrow!,
- @byrow, @passmissing, @astable, @kwarg,
+ @byrow, @passmissing, @astable, @kwarg, @when,
@based_on, @where # deprecated
const DOLLAR = raw"$"
diff --git a/src/macros.jl b/src/macros.jl
index 860d7b1f..55e2a175 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -685,6 +685,83 @@ macro with(d, body)
esc(with_helper(d, body))
end
+"""
+ @when(args...)
+
+Perform operations on a subset of `df`, but still
+return a data frame with the same number of rows as `df`. `@when` can be used
+with the `@transform` macros, `@select` macros, and `@with`.
+
+`@when` is not a "real" macro. It is only functional inside DataFramesMeta.jl macros.
+A motivating example:
+
+```
+@rtransform df begin
+ @when :a == 1
+ :y = :y - mean(:y)
+end
+```
+
+The above block generates the column `:y` which is de-meaned with respect to observations where
+`:a == 1`. If `:y` already exists in `df`, then new values over-write old values only
+when `:a == 1`. If `:y` does not already exist in `df`, then new values are written
+when `:a == 1`, and remaining values are filled with `missing`.
+
+Only one `@when` statement is allowed per transformation macro and it must be the
+first argument in the transformation.
+
+`@when` inherits `@byrow` and `@passmissing` from the transformation. As an example:
+
+```
+@transform df @byrow begin
+ @when :a == 1
+ ...
+end
+```
+
+In the above, the condition inside `@when` operates row-wise. However, `@byrow` and `@passmissing` can
+also be passed independently, such as `@byrow @when :a == 1`.
+
+Like `@subset`, `@when` drops rows where `missing` values are returned. Unlike `@subset`,
+there is currently no way to control this behavior.
+
+## Details
+
+`@when` operates by calling `select` with the `view = true` keyword argument,
+followed by a `transform!` call. See `?transform!` for more details. Roughly,
+the expression
+
+```
+@transform df begin
+ @when :a .== 1
+ :y = 5
+end
+```
+
+translates to
+
+```
+df1 = @subset(copy(df), :a .== 1; view = true)
+df2 = @transform! df1 :y = 5
+parent(df2)
+```
+
+Unlike the other macro-flags, such as `@passmissing` and `@byrow`, `@when` cannot be
+used at the top-level.
+```
+@transform df @byrow @when(:a == 1) begin
+ :x = 1
+ :y = 2
+end
+```
+is not supported.
+
+"""
+macro when(args...)
+ throw(ArgumentError("@passmissing only works inside DataFramesMeta macros."))
+end
+
+
ASTABLE_RHS_ORDERBY_DOCS = """
In operations, it is also allowed to use `AsTable(cols)` to work with
multiple columns at once, where the columns are grouped together in a
From 2c0e1c2fda08e29f18b3342854b44c654681fbe7 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Thu, 21 Dec 2023 10:48:17 -0500
Subject: [PATCH 11/15] fix diff
---
docs/src/index.md | 128 +---------------------------------------------
1 file changed, 1 insertion(+), 127 deletions(-)
diff --git a/docs/src/index.md b/docs/src/index.md
index f8eb2c46..6b176f79 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -48,133 +48,7 @@ Use `passmissing` to propagate `missing` values more easily. See `?passmissing`
details. `passmissing` is defined in [Missings.jl](https://github.com/JuliaData/Missings.jl)
but exported by DataFramesMeta for convenience.
-# Provided macrosp = graph_mean_outp = graph_mean_outcomes_bins(
- "dev_status",
- "income_pctile",
- "distance_bin", p = graph_mean_outcomes_bins(
- "dev_status",
- "income_pctile",
- "distance_bin",
- "hilly",
- "pop_weight_country",
- long_shapes)
-
- "hilly",
- "pop_weight_country",
- long_shapes)
-p = graph_mean_outcomes_bins(
- "dev_status",
- "income_pctile",
- "distance_bin",
- "hilly",
- "pop_weight_country",
- long_shapes)
-p = graph_mean_outcomes_bins(
- "dev_status",
- "income_pctile",
- "distance_bin",
- "hilly",
- "pop_weight_country",
- long_shapes)
-p = graph_mean_outcomes_bins(
- "dev_status",
- "income_pctile",
- "distance_bin",
- "hilly",
- "pop_weight_country",
- long_shapes)
-p = graph_mean_outcomes_bins(
- "dev_status",
- "income_pctile",
- "distance_bin",
- "hilly",
- "pop_weight_country",
- long_shapes)
-p = graph_mean_outcomes_bins(
- "dev_status",
- "income_pctile",
- "distance_bin",
- "hilly",
- "pop_weight_country",
- long_shapes)
-p = graph_mean_outcomes_bins(
- "dev_status",
- "income_pctile",
- "distance_bin",
- "hilly",
- "pop_weight_country",
- long_shapes)
-p = graph_mean_outcomes_bins(
- "dev_status",
- "income_pctile",
- "distance_bin",
- "hilly",
- "pop_weight_country",
- long_shapes)
-p = graph_mean_outcomes_bins(
- "dev_status",
- "income_pctile",
- "distance_bin",
- "hilly",
- "pop_weight_country",
- long_shapes)
-p = graph_mean_outcomes_bins(
- "dev_status",
- "income_pctile",
- "distance_bin",
- "hilly",
- "pop_weight_country",
- long_shapes)
-p = graph_mean_outcomes_bins(
- "dev_status",
- "income_pctile",
- "distance_bin",
- "hilly",
- "pop_weight_country",
- long_shapes)
-p = graph_mean_outcomes_bins(
- "dev_status",
- "income_pctile",
- "distance_bin",
- "hilly",
- "pop_weight_country",
- long_shapes)
-p = graph_mean_outcomes_bins(
- "dev_status",
- "income_pctile",
- "distance_bin",
- "hilly",
- "pop_weight_country",
- long_shapes)
-cp = graph_mean_outcomes_bins(
- "dev_status",
- "income_pctile",
- "distance_bin",
- "hilly",
- "pop_weight_country",
- long_shapes)
-p = graph_mean_outcomes_bins(
- "dev_status",
- "income_pctile",
- "distance_bin",
- "hilly",
- "pop_weight_country",
- long_shapes)
-op = graph_mean_outcomes_bins(
- "dev_status",
- "income_pctile",
- "distance_bin",
- "hilly",
- "pop_weight_country",
- long_shapes)
-mes_bins(
- "dev_status",
- "income_pctile",
- "distance_bin",
- "hilly",
- "pop_weight_country",
- long_shapes)
-
+# Provided macros
!!! note
From 58cb68ad94bfac9f028fe2c5d49de21b77981d12 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Thu, 21 Dec 2023 15:52:54 -0500
Subject: [PATCH 12/15] add when.jl to tests
---
test/runtests.jl | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/test/runtests.jl b/test/runtests.jl
index 3cb43e93..5af4ce06 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -15,7 +15,8 @@ my_tests = ["dataframes.jl",
"byrow.jl",
"astable.jl",
"astable_flag.jl",
- "passmissing.jl"]
+ "passmissing.jl",
+ "when.jl"]
println("Running tests:")
From ff32e7fbcbdf514cdc921453c7a97a70c8aa235f Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Thu, 28 Mar 2024 11:50:47 -0400
Subject: [PATCH 13/15] merge in master again
---
src/DataFramesMeta.jl | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/src/DataFramesMeta.jl b/src/DataFramesMeta.jl
index 4e96c4c9..330f7c6a 100644
--- a/src/DataFramesMeta.jl
+++ b/src/DataFramesMeta.jl
@@ -24,13 +24,9 @@ export @with,
@rtransform, @rselect, @rtransform!, @rselect!,
@distinct, @rdistinct, @distinct!, @rdistinct!,
@eachrow, @eachrow!,
-<<<<<<< HEAD
- @byrow, @passmissing, @astable, @kwarg, @when,
-=======
- @byrow, @passmissing, @astable, @kwarg,
+ @byrow, @passmissing, @astable, @kwarg, @when
@label!, @note!, printlabels, printnotes,
@groupby,
->>>>>>> master
@based_on, @where # deprecated
const DOLLAR = raw"$"
From f166cb2ddf0a7a2895930e5179ee030c791f228d Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Thu, 28 Mar 2024 12:02:42 -0400
Subject: [PATCH 14/15] more merge fixes
---
src/DataFramesMeta.jl | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/DataFramesMeta.jl b/src/DataFramesMeta.jl
index 330f7c6a..553b5fa5 100644
--- a/src/DataFramesMeta.jl
+++ b/src/DataFramesMeta.jl
@@ -24,7 +24,7 @@ export @with,
@rtransform, @rselect, @rtransform!, @rselect!,
@distinct, @rdistinct, @distinct!, @rdistinct!,
@eachrow, @eachrow!,
- @byrow, @passmissing, @astable, @kwarg, @when
+ @byrow, @passmissing, @astable, @kwarg, @when,
@label!, @note!, printlabels, printnotes,
@groupby,
@based_on, @where # deprecated
From 2e1692bedd93896fb4441acaf1d6aa8ffa01da43 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Fri, 29 Mar 2024 10:57:17 -0400
Subject: [PATCH 15/15] handle grouped data frames
---
src/macros.jl | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/src/macros.jl b/src/macros.jl
index 409b2041..d3baae33 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -1555,6 +1555,8 @@ end
## transform & @transform
##
##############################################################################
+copy_gd(x::GroupedDataFrame) = transform(x; ungroup = false)
+copy_gd(x::AbstractDataFrame) = copy(x)
function generic_transform_select_helper(x, args...; wrap_byrow::Bool = false, modify::Bool = false, selectfun::Bool = false)
if selectfun
secondstagefun = select!
@@ -1586,7 +1588,7 @@ function generic_transform_select_helper(x, args...; wrap_byrow::Bool = false, m
end
else
quote
- $z = $subset($copy($x), $w; view = true, skipmissing = true)
+ $z = $subset($copy_gd($x), $w; view = true, skipmissing = true)
$parent($secondstagefun($z, $(t...); $(kw...)))
end
end