Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change AST for iterations to use iteration kind #433

Merged
merged 1 commit into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions docs/src/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ class of tokenization errors and lets the parser deal with them.
* Using `try catch else finally end` is parsed with `K"catch"` `K"else"` and `K"finally"` children to avoid the awkwardness of the optional child nodes in the `Expr` representation (#234)
* The dotted import path syntax as in `import A.b.c` is parsed with a `K"importpath"` kind rather than `K"."`, because a bare `A.b.c` has a very different nested/quoted expression representation (#244)
* We use flags rather than child nodes to represent the difference between `struct` and `mutable struct`, `module` and `baremodule` (#220)
* Multiple iterations within the header of a `for`, as in `for a=as, b=bs body end` are represented with a `cartesian_iterator` head rather than a `block`, as these lists of iterators are neither semantically nor syntactically a sequence of statements. Unlike other uses of `block` (see also generators).
* Iterations are represented with the `iteration` head rather than `=` within the header of a `for`. Thus `for i=is ; body end` parses to `(for (iteration i is) (block body))`. Cartesian iteration as in `for a=as, b=bs body end` are represented with a longer `iteration` block rather than a `block` containing `=` because these lists of iterators are neither semantically nor syntactically a sequence of statements, unlike other uses of `block`. Generators also use the `iteration` head - see information on that below.

## More detail on tree differences

Expand All @@ -90,8 +90,10 @@ mean

```
for x in xs
for y in ys
push!(xy, collection)
for y in ys
push!(xy, collection)
end
end
```

so the `xy` prefix is in the *body* of the innermost for loop. Following this,
Expand All @@ -112,27 +114,25 @@ source order.

However, our green tree is strictly source-ordered, so we must deviate from the
Julia AST. We deal with this by grouping cartesian products of iterators
(separated by commas) within `cartesian_iterator` blocks as in `for` loops, and
use the presence of multiple iterator blocks rather than the `flatten` head to
(separated by commas) within `iteration` blocks as in `for` loops, and
use the length of the `iteration` block rather than the `flatten` head to
distinguish flattened iterators. The nested flattens and generators of `Expr`
forms are reconstructed later. In this form the tree structure resembles the
source much more closely. For example, `(xy for x in xs for y in ys)` is parsed as

```
(generator
xy
(= x xs)
(= y ys))
(iteration x xs)
(iteration y ys))
```

And the cartesian iteration `(xy for x in xs, y in ys)` is parsed as

```
(generator
xy
(cartesian_iterator
(= x xs)
(= y ys)))
(iteration x xs y ys))
```

### Whitespace trivia inside strings
Expand Down
34 changes: 16 additions & 18 deletions src/expr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,17 @@ function _extract_do_lambda!(args)
end
end

function _append_iterspec!(args, ex)
if @isexpr(ex, :iteration)
for iter in ex.args::Vector{Any}
push!(args, Expr(:(=), iter.args...))
end
else
push!(args, ex)
end
return args
end

# Convert internal node of the JuliaSyntax parse tree to an Expr
function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args)
k = kind(head)
Expand Down Expand Up @@ -301,10 +312,8 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads,
# Move parameters blocks to args[2]
_reorder_parameters!(args, 2)
elseif k == K"for"
a1 = args[1]
if @isexpr(a1, :cartesian_iterator)
args[1] = Expr(:block, a1.args...)
end
iters = _append_iterspec!([], args[1])
args[1] = length(iters) == 1 ? only(iters) : Expr(:block, iters...)
# Add extra line number node for the `end` of the block. This may seem
# useless but it affects code coverage.
push!(args[2].args, endloc)
Expand Down Expand Up @@ -360,12 +369,8 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads,
# source-ordered `generator` format.
gen = args[1]
for j = length(args):-1:2
aj = args[j]
if @isexpr(aj, :cartesian_iterator)
gen = Expr(:generator, gen, aj.args...)
else
gen = Expr(:generator, gen, aj)
end
gen = Expr(:generator, gen)
_append_iterspec!(gen.args, args[j])
if j < length(args)
# Additional `for`s flatten the inner generator
gen = Expr(:flatten, gen)
Expand All @@ -374,14 +379,7 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads,
return gen
elseif k == K"filter"
@assert length(args) == 2
iterspec = args[1]
outargs = Any[args[2]]
if @isexpr(iterspec, :cartesian_iterator)
append!(outargs, iterspec.args)
else
push!(outargs, iterspec)
end
args = outargs
args = _append_iterspec!(Any[args[2]], args[1])
elseif k == K"nrow" || k == K"ncat"
# For lack of a better place, the dimension argument to nrow/ncat
# is stored in the flags
Expand Down
2 changes: 1 addition & 1 deletion src/kinds.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1099,7 +1099,7 @@ register_kinds!(JuliaSyntax, 0, [
# Comprehensions
"generator"
"filter"
"cartesian_iterator"
"iteration"
"comprehension"
"typed_comprehension"
# Container for a single statement/atom plus any trivia and errors
Expand Down
43 changes: 20 additions & 23 deletions src/parser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1075,7 +1075,7 @@ function parse_where_chain(ps0::ParseState, mark)
# x where {T,S} ==> (where x (braces T S))
# Also various nonsensical forms permitted
# x where {T S} ==> (where x (bracescat (row T S)))
# x where {y for y in ys} ==> (where x (braces (generator y (= y ys))))
# x where {y for y in ys} ==> (where x (braces (generator y (iteration (in y ys)))))
m = position(ps)
bump(ps, TRIVIA_FLAG)
ckind, cflags = parse_cat(ps, K"}", ps.end_symbol)
Expand Down Expand Up @@ -1578,7 +1578,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
# T[x y] ==> (typed_hcat T x y)
# T[x ; y] ==> (typed_vcat T x y)
# T[a b; c d] ==> (typed_vcat T (row a b) (row c d))
# T[x for x in xs] ==> (typed_comprehension T (generator x (= x xs)))
# T[x for x in xs] ==> (typed_comprehension T (generator x (iteration (in x xs))))
#v1.8: T[a ; b ;; c ; d] ==> (typed_ncat-2 T (nrow-1 a b) (nrow-1 c d))
outk = ckind == K"vect" ? K"ref" :
ckind == K"hcat" ? K"typed_hcat" :
Expand Down Expand Up @@ -1798,8 +1798,8 @@ function parse_resword(ps::ParseState)
bump_closing_token(ps, K"end")
emit(ps, mark, K"while")
elseif word == K"for"
# for x in xs end ==> (for (= x xs) (block))
# for x in xs, y in ys \n a \n end ==> (for (cartesian_iterator (= x xs) (= y ys)) (block a))
# for x in xs end ==> (for (iteration (in x xs)) (block))
# for x in xs, y in ys \n a \n end ==> (for (iteration (in x xs) (in y ys)) (block a))
bump(ps, TRIVIA_FLAG)
parse_iteration_specs(ps)
parse_block(ps)
Expand Down Expand Up @@ -2621,11 +2621,11 @@ function parse_iteration_spec(ps::ParseState)
if peek_behind(ps).orig_kind == K"outer"
if peek_skip_newline_in_gen(ps) in KSet"= in ∈"
# Not outer keyword
# outer = rhs ==> (= outer rhs)
# outer <| x = rhs ==> (= (call-i outer <| x) rhs)
# outer = rhs ==> (iteration (in outer rhs))
# outer <| x = rhs ==> (iteration (in (call-i outer <| x) rhs))
else
# outer i = rhs ==> (= (outer i) rhs)
# outer (x,y) = rhs ==> (= (outer (tuple-p x y)) rhs)
# outer i = rhs ==> (iteration (in (outer i) rhs))
# outer (x,y) = rhs ==> (iteration (in (outer (tuple-p x y)) rhs))
reset_node!(ps, position(ps), kind=K"outer", flags=TRIVIA_FLAG)
parse_pipe_lt(ps)
emit(ps, mark, K"outer")
Expand All @@ -2641,17 +2641,15 @@ function parse_iteration_spec(ps::ParseState)
end
# Or try parse_pipe_lt ???
end
emit(ps, mark, K"=")
emit(ps, mark, K"in")
end

# Parse an iteration spec, or a comma separate list of such for for loops and
# generators
function parse_iteration_specs(ps::ParseState)
mark = position(ps)
n_iters = parse_comma_separated(ps, parse_iteration_spec)
if n_iters > 1
emit(ps, mark, K"cartesian_iterator")
end
emit(ps, mark, K"iteration")
end

# flisp: parse-space-separated-exprs
Expand Down Expand Up @@ -2701,27 +2699,27 @@ end
# Parse generators
#
# We represent generators quite differently from `Expr`:
# * Cartesian products of iterators are grouped within cartesian_iterator
# * Iteration variables and their iterators are grouped within K"iteration"
# nodes, as in the short form of `for` loops.
# * The `generator` kind is used for both cartesian and flattened generators
#
# (x for a in as for b in bs) ==> (parens (generator x (= a as) (= b bs)))
# (x for a in as, b in bs) ==> (parens (generator x (cartesian_iterator (= a as) (= b bs))))
# (x for a in as, b in bs if z) ==> (parens (generator x (filter (cartesian_iterator (= a as) (= b bs)) z)))
# (x for a in as for b in bs) ==> (parens (generator x (iteration (in a as)) (iteration (in b bs))))
# (x for a in as, b in bs) ==> (parens (generator x (iteration (in a as) (in b bs))))
# (x for a in as, b in bs if z) ==> (parens (generator x (filter (iteration (in a as) (in b bs)) z)))
#
# flisp: parse-generator
function parse_generator(ps::ParseState, mark)
while (t = peek_token(ps); kind(t) == K"for")
if !preceding_whitespace(t)
# ((x)for x in xs) ==> (parens (generator (parens x) (error) (= x xs)))
# ((x)for x in xs) ==> (parens (generator (parens x) (error) (iteration (in x xs))))
bump_invisible(ps, K"error", TRIVIA_FLAG,
error="Expected space before `for` in generator")
end
bump(ps, TRIVIA_FLAG)
iter_mark = position(ps)
parse_iteration_specs(ps)
if peek(ps) == K"if"
# (x for a in as if z) ==> (parens (generator x (filter (= a as) z)))
# (x for a in as if z) ==> (parens (generator x (filter (iteration (in a as)) z)))
bump(ps, TRIVIA_FLAG)
parse_cond(ps)
emit(ps, iter_mark, K"filter")
Expand All @@ -2732,7 +2730,7 @@ end

# flisp: parse-comprehension
function parse_comprehension(ps::ParseState, mark, closer)
# [x for a in as] ==> (comprehension (generator x a in as))
# [x for a in as] ==> (comprehension (generator x (iteration (in a as))))
ps = ParseState(ps, whitespace_newline=true,
space_sensitive=false,
end_symbol=false)
Expand Down Expand Up @@ -2982,8 +2980,8 @@ function parse_cat(ps::ParseState, closer, end_is_symbol)
# [x ==> (vect x (error-t))
parse_vect(ps, closer)
elseif k == K"for"
# [x for a in as] ==> (comprehension (generator x (= a as)))
# [x \n\n for a in as] ==> (comprehension (generator x (= a as)))
# [x for a in as] ==> (comprehension (generator x (iteration (in a as))))
# [x \n\n for a in as] ==> (comprehension (generator x (iteration (in a as))))
parse_comprehension(ps, mark, closer)
else
# [x y] ==> (hcat x y)
Expand Down Expand Up @@ -3139,8 +3137,7 @@ function parse_brackets(after_parse::Function,
continue
elseif k == K"for"
# Generator syntax
# (x for a in as) ==> (parens (generator x (= a as)))
# (x \n\n for a in as) ==> (parens (generator x (= a as)))
# (x for a in as) ==> (parens (generator x (iteration (in a as))))
parse_generator(ps, mark)
else
# Error - recovery done when consuming closing_kind
Expand Down
62 changes: 31 additions & 31 deletions test/parser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ tests = [
"x where \n {T}" => "(where x (braces T))"
"x where {T,S}" => "(where x (braces T S))"
"x where {T S}" => "(where x (bracescat (row T S)))"
"x where {y for y in ys}" => "(where x (braces (generator y (= y ys))))"
"x where {y for y in ys}" => "(where x (braces (generator y (iteration (in y ys)))))"
"x where T" => "(where x T)"
"x where \n T" => "(where x T)"
"x where T<:S" => "(where x (<: T S))"
Expand Down Expand Up @@ -389,7 +389,7 @@ tests = [
"T[x y]" => "(typed_hcat T x y)"
"T[x ; y]" => "(typed_vcat T x y)"
"T[a b; c d]" => "(typed_vcat T (row a b) (row c d))"
"T[x for x in xs]" => "(typed_comprehension T (generator x (= x xs)))"
"T[x for x in xs]" => "(typed_comprehension T (generator x (iteration (in x xs))))"
((v=v"1.8",), "T[a ; b ;; c ; d]") => "(typed_ncat-2 T (nrow-1 a b) (nrow-1 c d))"

# Dotted forms
Expand Down Expand Up @@ -461,8 +461,8 @@ tests = [
"while cond body end" => "(while cond (block body))"
"while x < y \n a \n b \n end" => "(while (call-i x < y) (block a b))"
# for
"for x in xs end" => "(for (= x xs) (block))"
"for x in xs, y in ys \n a \n end" => "(for (cartesian_iterator (= x xs) (= y ys)) (block a))"
"for x in xs end" => "(for (iteration (in x xs)) (block))"
"for x in xs, y in ys \n a \n end" => "(for (iteration (in x xs) (in y ys)) (block a))"
# let
"let x=1\n end" => "(let (block (= x 1)) (block))"
"let x=1 ; end" => "(let (block (= x 1)) (block))"
Expand Down Expand Up @@ -670,16 +670,16 @@ tests = [
"import A..." => "(import (importpath A ..))"
"import A; B" => "(import (importpath A))"
],
JuliaSyntax.parse_iteration_spec => [
"i = rhs" => "(= i rhs)"
"i in rhs" => "(= i rhs)"
"i ∈ rhs" => "(= i rhs)"
"i = 1:10" => "(= i (call-i 1 : 10))"
"(i,j) in iter" => "(= (tuple-p i j) iter)"
"outer = rhs" => "(= outer rhs)"
"outer <| x = rhs" => "(= (call-i outer <| x) rhs)"
"outer i = rhs" => "(= (outer i) rhs)"
"outer (x,y) = rhs" => "(= (outer (tuple-p x y)) rhs)"
JuliaSyntax.parse_iteration_specs => [
"i = rhs" => "(iteration (in i rhs))"
"i in rhs" => "(iteration (in i rhs))"
"i ∈ rhs" => "(iteration (in i rhs))"
"i = 1:10" => "(iteration (in i (call-i 1 : 10)))"
"(i,j) in iter" => "(iteration (in (tuple-p i j) iter))"
"outer = rhs" => "(iteration (in outer rhs))"
"outer <| x = rhs" => "(iteration (in (call-i outer <| x) rhs))"
"outer i = rhs" => "(iteration (in (outer i) rhs))"
"outer (x,y) = rhs" => "(iteration (in (outer (tuple-p x y)) rhs))"
],
JuliaSyntax.parse_paren => [
# Tuple syntax with commas
Expand Down Expand Up @@ -707,8 +707,8 @@ tests = [
"(x)" => "(parens x)"
"(a...)" => "(parens (... a))"
# Generators
"(x for a in as)" => "(parens (generator x (= a as)))"
"(x \n\n for a in as)" => "(parens (generator x (= a as)))"
"(x for a in as)" => "(parens (generator x (iteration (in a as))))"
"(x \n\n for a in as)" => "(parens (generator x (iteration (in a as))))"
# Range parsing in parens
"(1:\n2)" => "(parens (call-i 1 : 2))"
"(1:2)" => "(parens (call-i 1 : 2))"
Expand Down Expand Up @@ -776,19 +776,19 @@ tests = [
"[x \n, ]" => "(vect x)"
"[x" => "(vect x (error-t))"
"[x \n\n ]" => "(vect x)"
"[x for a in as]" => "(comprehension (generator x (= a as)))"
"[x \n\n for a in as]" => "(comprehension (generator x (= a as)))"
"[x for a in as]" => "(comprehension (generator x (iteration (in a as))))"
"[x \n\n for a in as]" => "(comprehension (generator x (iteration (in a as))))"
# parse_generator
"(x for a in as for b in bs)" => "(parens (generator x (= a as) (= b bs)))"
"(x for a in as, b in bs)" => "(parens (generator x (cartesian_iterator (= a as) (= b bs))))"
"(x for a in as, b in bs if z)" => "(parens (generator x (filter (cartesian_iterator (= a as) (= b bs)) z)))"
"(x for a in as, b in bs for c in cs, d in ds)" => "(parens (generator x (cartesian_iterator (= a as) (= b bs)) (cartesian_iterator (= c cs) (= d ds))))"
"(x for a in as for b in bs if z)" => "(parens (generator x (= a as) (filter (= b bs) z)))"
"(x for a in as if z for b in bs)" => "(parens (generator x (filter (= a as) z) (= b bs)))"
"[x for a = as for b = bs if cond1 for c = cs if cond2]" => "(comprehension (generator x (= a as) (filter (= b bs) cond1) (filter (= c cs) cond2)))"
"[x for a = as if begin cond2 end]" => "(comprehension (generator x (filter (= a as) (block cond2))))"
"[(x)for x in xs]" => "(comprehension (generator (parens x) (error-t) (= x xs)))"
"(x for a in as if z)" => "(parens (generator x (filter (= a as) z)))"
"(x for a in as for b in bs)" => "(parens (generator x (iteration (in a as)) (iteration (in b bs))))"
"(x for a in as, b in bs)" => "(parens (generator x (iteration (in a as) (in b bs))))"
"(x for a in as, b in bs if z)" => "(parens (generator x (filter (iteration (in a as) (in b bs)) z)))"
"(x for a in as, b in bs for c in cs, d in ds)" => "(parens (generator x (iteration (in a as) (in b bs)) (iteration (in c cs) (in d ds))))"
"(x for a in as for b in bs if z)" => "(parens (generator x (iteration (in a as)) (filter (iteration (in b bs)) z)))"
"(x for a in as if z for b in bs)" => "(parens (generator x (filter (iteration (in a as)) z) (iteration (in b bs))))"
"[x for a = as for b = bs if cond1 for c = cs if cond2]" => "(comprehension (generator x (iteration (in a as)) (filter (iteration (in b bs)) cond1) (filter (iteration (in c cs)) cond2)))"
"[x for a = as if begin cond2 end]" => "(comprehension (generator x (filter (iteration (in a as)) (block cond2))))"
"[(x)for x in xs]" => "(comprehension (generator (parens x) (error-t) (iteration (in x xs))))"
"(x for a in as if z)" => "(parens (generator x (filter (iteration (in a as)) z)))"
# parse_vect
"[x, y]" => "(vect x y)"
"[x, y]" => "(vect x y)"
Expand Down Expand Up @@ -876,8 +876,8 @@ tests = [
"\"hi\$(\"ho\")\"" => "(string \"hi\" (parens (string \"ho\")))"
"\"\$(x,y)\"" => "(string (parens (error x y)))"
"\"\$(x;y)\"" => "(string (parens (error x y)))"
"\"\$(x for y in z)\"" => "(string (parens (error (generator x (= y z)))))"
"\"\$((x for y in z))\"" => "(string (parens (parens (generator x (= y z)))))"
"\"\$(x for y in z)\"" => "(string (parens (error (generator x (iteration (in y z))))))"
"\"\$((x for y in z))\"" => "(string (parens (parens (generator x (iteration (in y z))))))"
"\"\$(xs...)\"" => "(string (parens (... xs)))"
"\"a \$foo b\"" => "(string \"a \" foo \" b\")"
"\"\$var\"" => "(string var)"
Expand Down Expand Up @@ -996,7 +996,7 @@ parsestmt_test_specs = [
":+'y'" => "(juxtapose (call-post (quote-: +) ') (call-post y '))"
# unary subtype ops and newlines
"a +\n\n<:" => "(call-i a + <:)"
"for\n\n<:" => "(for (= <: (error (error-t))) (block (error)) (error-t))"
"for\n\n<:" => "(for (iteration (in <: (error (error-t)))) (block (error)) (error-t))"
# Empty character consumes trailing ' delimiter (ideally this could be
# tested above but we don't require the input stream to be consumed in the
# unit tests there.
Expand Down
Loading