Skip to content

Commit

Permalink
Lift restrictions for matching of binaries and maps
Browse files Browse the repository at this point in the history
There has always been an implementation limitation for matching
of binaries (for technical reasons). For example:

    foo(Bin) ->
        <<A:8>> = <<X:4,Y:4>> = Bin,
        {A,X,Y}.

This would fail to compile with the following message:

    t.erl:5:5: binary patterns cannot be matched in parallel using '='
    %    5|     <<A:8>> = <<X:4,Y:4>> = Bin,
    %     |     ^

This commit lifts this restriction, making the example legal.

A restriction for map matching is also lifted, but before we can
describe that, we'll need a digression to talk about the `=` operator.

The `=` operator can be used for two similar but slightly differently
purposes.

When used in a pattern, for example in a function head, both the
left-hand and right-hand side operands must be patterns:

    Pattern1 = Pattern2

For example:

    bar(#{a := A} = #{b := B}) -> {A, B}.

The following example will not compile because the right-hand side
is not a pattern but an expression:

    wrong(#{a := A} = #{b => B}) -> {A, B}.

    t.erl:4:23: illegal pattern
    %    4| wrong(#{a := A} = #{b => B}) -> {A, B}.
    %     |                       ^

Used in this context, the `=` operator does not imply that the two
patterns are matched in any particular order. Attempting to use a
variable matched out on the left-hand side on the right-hand side, or
vice versa, will fail:

    also_wrong1(#{B := A} = #{b := B}) -> {A,B}.
    also_wrong2(#{a := A} = #{A := B}) -> {A,B}.

    t.erl:6:15: variable 'B' is unbound
    %    6| also_wrong1(#{B := A} = #{b := B}) -> {A,B}.
    %     |               ^

    t.erl:7:27: variable 'A' is unbound
    %    7| also_wrong2(#{a := A} = #{A := B}) -> {A,B}.
    %     |                           ^

The other way to use `=` is in a function body. Used in this way,
the right-hand side must be an expression:

    Pattern = Expression

For example:

    foobar(Value) ->
        #{a := A} = #{a => Value},
        A.

Used in this context, the right-hand side of `=` must **not** be a pattern:

    illegal_foobar(Value) ->
        #{a := A} = #{a := Value},
        A.

    t.erl:18:21: only association operators '=>' are allowed in map construction
    %   18|     #{a := A} = #{a := Value},
    %     |                     ^

When used in a body context, the value of the `=` operator is the
value of its right-hand side operand. When multiple `=` operators are
combined, they are evaluted from right to left. That means that any
number of patterns can be matched at once:

    Pattern1 = Pattern2 = ... = PatternN = Expr

Given that there is a well-defined evaluation order, one would except
that the following example would be legal:

    baz(M) ->
        #{K := V} = #{k := K} = M,
        V.

It is not. In Erlang/OTP 25 or earlier, the compilation fails with the
following message:

    t.erl:28:7: variable 'K' is unbound
    %   28|     #{K := V} = #{k := K} = M,
    %     |       ^

That restriction is now lifted, making the example legal.

Closes erlang#6348
  • Loading branch information
bjorng committed Nov 2, 2022
1 parent c00c436 commit ec6c392
Show file tree
Hide file tree
Showing 8 changed files with 334 additions and 183 deletions.
83 changes: 64 additions & 19 deletions lib/compiler/src/v3_core.erl
Original file line number Diff line number Diff line change
Expand Up @@ -2025,10 +2025,18 @@ is_safe(_) -> false.
%% fold_match(MatchExpr, Pat) -> {MatchPat,Expr}.
%% Fold nested matches into one match with aliased patterns.

fold_match({match,L,P0,E0}, P) ->
{P1,E1} = fold_match(E0, P),
{{match,L,P0,P1},E1};
fold_match(E, P) -> {P,E}.
fold_match(P, E) ->
fold_match_1(P, [{0,E}]).

fold_match_1({match,L,P0,E0}, Acc) ->
fold_match_1(E0, [{L,P0}|Acc]);
fold_match_1(E, Acc) ->
{fold_match_2(Acc),E}.

fold_match_2([{_L,P}]) ->
P;
fold_match_2([{L,P}|Ps]) ->
{match,L,P,fold_match_2(Ps)}.

%% pattern(Pattern, State) -> {CorePat,[PreExp],State}.
%% Transform a pattern by removing line numbers. We also normalise
Expand Down Expand Up @@ -2191,17 +2199,26 @@ pat_alias(#c_alias{var=#c_var{name=V1}=Var1,pat=P1},
pat_alias(#c_alias{var=#c_var{}=Var,pat=P1}, P2) ->
#c_alias{var=Var,pat=pat_alias(P1, P2)};

pat_alias(#imap{es=Es1}=M, #imap{es=Es2}) ->
M#imap{es=pat_alias_map_pairs(Es1 ++ Es2)};
pat_alias(#imap{es=LEs}=M, #imap{es=REs0}) ->
[E0|REs] = REs0,
#imappair{anno=#a{anno=Anno}=A} = E0,
E = E0#imappair{anno=A#a{anno=[parallel_match|Anno]}},
M#imap{es=LEs++[E|REs]};

pat_alias(P1, #c_var{}=Var) ->
#c_alias{var=Var,pat=P1};
pat_alias(P1, #c_alias{pat=P2}=Alias) ->
Alias#c_alias{pat=pat_alias(P1, P2)};

pat_alias(#ibinary{segments=[]}=P, #ibinary{segments=[]}) ->
P;
pat_alias(#ibinary{segments=[_|_]=Segs1}=P, #ibinary{segments=[S0|Segs2]}) ->
#ibitstr{anno=#a{anno=Anno}=A} = S0,
S = S0#ibitstr{anno=A#a{anno=[parallel_match|Anno]}},
P#ibinary{segments=Segs1++[S|Segs2]};

pat_alias(P1, P2) ->
%% Aliases between binaries are not allowed, so the only
%% legal patterns that remain are data patterns.
%% The only legal patterns that remain are data patterns.
case cerl:is_data(P1) andalso cerl:is_data(P2) of
false -> throw(nomatch);
true -> ok
Expand Down Expand Up @@ -3635,9 +3652,14 @@ split_pat(#c_binary{segments=Segs0}=Bin, St0) ->
case split_bin_segments(Segs0, Vars, St0, []) of
none ->
none;
{TailVar,Wrap,Bef,Aft,St} ->
{size_var,TailVar,Wrap,Bef,Aft,St} ->
BefBin = Bin#c_binary{segments=Bef},
{BefBin,{split,[TailVar],Wrap,Bin#c_binary{segments=Aft},nil},St}
{BefBin,{split,[TailVar],Wrap,Bin#c_binary{segments=Aft},nil},St};
{parallel_match,Bef,Aft,St1} ->
{BinVar,St} = new_var(St1),
BefBin = #c_alias{var=BinVar,pat=Bin#c_binary{segments=Bef}},
Wrap = fun(Body) -> Body end,
{BefBin,{split,[BinVar],Wrap,Bin#c_binary{segments=Aft},nil},St}
end;
split_pat(#c_map{es=Es}=Map, St) ->
split_map_pat(Es, Map, St, []);
Expand All @@ -3657,7 +3679,21 @@ split_pat(Data, St0) ->
Es = cerl:data_es(Data),
split_data(Es, Type, St0, []).

split_map_pat([#c_map_pair{key=Key,val=Val}=E0|Es], Map0, St0, Acc) ->
split_map_pat([#c_map_pair{anno=Anno0}=E0|Es], Map, St0, Acc) ->
case member(parallel_match, Anno0) of
true ->
Anno = Anno0 -- [parallel_match],
E = E0#c_map_pair{anno=Anno},
{MapVar,St} = new_var(St0),
BefMap = #c_alias{var=MapVar,pat=Map#c_map{es=reverse(Acc)}},
Wrap = fun(Body) -> Body end,
{BefMap,{split,[MapVar],Wrap,Map#c_map{es=[E|Es]},nil},St};
false ->
split_map_pat_1(E0, Es, Map, St0, Acc)
end;
split_map_pat([], _, _, _) -> none.

split_map_pat_1(#c_map_pair{key=Key,val=Val}=E0, Es, Map0, St0, Acc) ->
case eval_map_key(Key, E0, Es, Map0, St0) of
none ->
case split_pat(Val, St0) of
Expand All @@ -3673,8 +3709,7 @@ split_map_pat([#c_map_pair{key=Key,val=Val}=E0|Es], Map0, St0, Acc) ->
BefMap0 = Map0#c_map{es=reverse(Acc)},
BefMap = #c_alias{var=MapVar,pat=BefMap0},
{BefMap,Split,St1}
end;
split_map_pat([], _, _, _) -> none.
end.

eval_map_key(#c_var{}, _E, _Es, _Map, _St) ->
none;
Expand Down Expand Up @@ -3719,7 +3754,19 @@ split_data([E|Es0], Type, St0, Acc) ->
end;
split_data([], _, _, _) -> none.

split_bin_segments([#c_bitstr{val=Val,size=Size}=S0|Segs], Vars0, St0, Acc) ->
split_bin_segments([#c_bitstr{anno=Anno0}=S0|Segs], Vars, St, Acc) ->
case member(parallel_match, Anno0) of
true ->
Anno = Anno0 -- [parallel_match],
S = S0#c_bitstr{anno=Anno},
{parallel_match,reverse(Acc),[S|Segs],St};
false ->
split_bin_segments_1(S0, Segs, Vars, St, Acc)
end;
split_bin_segments(_, _, _, _) ->
none.

split_bin_segments_1(#c_bitstr{val=Val,size=Size}=S0, Segs, Vars0, St0, Acc) ->
Vars = case Val of
#c_var{name=V} -> gb_sets:add(V, Vars0);
_ -> Vars0
Expand All @@ -3736,7 +3783,7 @@ split_bin_segments([#c_bitstr{val=Val,size=Size}=S0|Segs], Vars0, St0, Acc) ->
%% in the same pattern.
{TailVar,Tail,St} = split_tail_seg(S0, Segs, St0),
Wrap = fun(Body) -> Body end,
{TailVar,Wrap,reverse(Acc, [Tail]),[S0|Segs],St};
{size_var,TailVar,Wrap,reverse(Acc, [Tail]),[S0|Segs],St};
false ->
split_bin_segments(Segs, Vars, St0, [S0|Acc])
end;
Expand All @@ -3748,10 +3795,8 @@ split_bin_segments([#c_bitstr{val=Val,size=Size}=S0|Segs], Vars0, St0, Acc) ->
{SizeVar,St2} = new_var(St1),
S = S0#c_bitstr{size=SizeVar},
{Wrap,St3} = split_wrap(SizeVar, Size, St2),
{TailVar,Wrap,reverse(Acc, [Tail]),[S|Segs],St3}
end;
split_bin_segments(_, _, _, _) ->
none.
{size_var,TailVar,Wrap,reverse(Acc, [Tail]),[S|Segs],St3}
end.

split_tail_seg(#c_bitstr{anno=A}=S, Segs, St0) ->
{TailVar,St} = new_var(St0),
Expand Down
97 changes: 93 additions & 4 deletions lib/compiler/test/bs_match_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@
bad_phi_paths/1,many_clauses/1,
combine_empty_segments/1,hangs_forever/1,
bs_saved_position_units/1,empty_matches/1,
trim_bs_start_match_resume/1]).
trim_bs_start_match_resume/1,
binary_aliases/1]).

-export([coverage_id/1,coverage_external_ignore/2]).

Expand Down Expand Up @@ -89,7 +90,8 @@ groups() ->
exceptions_after_match_failure,bad_phi_paths,
many_clauses,combine_empty_segments,hangs_forever,
bs_saved_position_units,empty_matches,
trim_bs_start_match_resume]}].
trim_bs_start_match_resume,
binary_aliases]}].

init_per_suite(Config) ->
test_lib:recompile(?MODULE),
Expand Down Expand Up @@ -2495,8 +2497,6 @@ trim_bs_start_match_resume_1(<<Context/binary>>) ->
_ = id(Context),
Context.

id(I) -> I.

expand_and_squeeze(Config) when is_list(Config) ->
%% UTF8 literals are expanded and then squeezed into integer16
ensure_squeezed(16, [?Q("<<$á/utf8,_/binary>>"),
Expand Down Expand Up @@ -2634,3 +2634,92 @@ many_clauses(_Config) ->

one_clause(I) ->
?Q(<<"{_@I@,<<L:8,Val:L>>} -> _@I@ + Val">>).

%% GH-6348/OTP-18297: Allow aliases for binaries.
-record(ba_foo, {a,b,c}).

binary_aliases(_Config) ->
F1 = fun(<<A:8>> = <<B:8>>) -> {A,B} end,
{42,42} = F1(id(<<42>>)),
{99,99} = F1(id(<<99>>)),

F2 = fun(#ba_foo{a = <<X:8>>} = #ba_foo{a = <<Y:8>>}) -> {X,Y} end,
{255,255} = F2(id(#ba_foo{a = <<-1>>})),
{107,107} = F2(id(#ba_foo{a = <<107>>})),

F3 = fun(#ba_foo{a = <<X:8>>} = #ba_foo{a = <<Y:4,Z:4>>}) -> {X,Y,Z} end,
{255,15,15} = F3(id(#ba_foo{a = <<-1>>})),
{16#5c,16#5,16#c} = F3(id(#ba_foo{a = <<16#5c>>})),

F4 = fun([<<A:8>> = {C,D} = <<B:8>>]) ->
{A,B,C,D};
(L) ->
lists:sum(L)
end,
6 = F4(id([1,2,3])),

F5 = fun(Val) ->
<<A:8>> = X = <<B:8>> = Val,
{A,B,X}
end,
{42,42,<<42>>} = F5(id(<<42>>)),

F6 = fun(X, Y) ->
<<A:8>> = <<X:4,Y:4>>,
A
end,
16#7c = F6(16#7, 16#c),
16#ed = F6(16#e, 16#d),

F7 = fun(Val) ->
(<<A:8>> = X) = (<<B:8>> = <<A:4,B:4>>) = Val,
{A,B,X}
end,
{0,0,<<0>>} = F7(id(<<0>>)),
{'EXIT',{{badmatch,<<1>>},_}} = catch F7(<<1>>),

F8 = fun(Val) ->
(<<A:8>> = X) = (Y = <<B:8>>) = Val,
{A,B,X,Y}
end,
{253,253,<<253>>,<<253>>} = F8(id(<<253>>)),

F9 = fun(Val) ->
(Z = <<A:8>> = X) = (Y = <<B:8>> = W) = Val,
{A,B,X,Y,Z,W}
end,
{201,201,<<201>>,<<201>>,<<201>>,<<201>>} = F9(id(<<201>>)),

F10 = fun(X) ->
<<>> = (<<>> = X)
end,
<<>> = F10(id(<<>>)),
{'EXIT',{{badmatch,42},_}} = catch F10(id(42)),

F11 = fun(Bin) ->
<<A:8/bits,B:24/bits>> = <<C:16,D:16>> = <<E:8,F:8,G:8,H:8>> = Bin,
{A,B,C,D,E,F,G,H}
end,
{<<0>>,<<0,0,0>>, 0,0, 0,0,0,0} = F11(id(<<0:32>>)),
{<<16#ab>>,<<16#cdef57:24>>, 16#abcd,16#ef57, 16#ab,16#cd,16#ef,16#57} =
F11(id(<<16#abcdef57:32>>)),

F12 = fun(#{key := <<X:8>>} = #{key := <<Y:8>>}) -> {X,Y} end,
{255,255} = F12(id(#{key => <<-1>>})),
{209,209} = F12(id(#{key => <<209>>})),

F13 = fun(Bin) ->
<<_:8,A:Size>> = <<_:8,B:Size/bits>> = <<Size:8,_/bits>> = Bin,
{Size,A,B}
end,
{0,0,<<>>} = F13(id(<<0>>)),
{1,1,<<1:1>>} = F13(id(<<1,1:1>>)),
{8,42,<<42>>} = F13(id(<<8,42>>)),

ok.


%%% Common utilities.

id(I) -> I.

16 changes: 14 additions & 2 deletions lib/compiler/test/map_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@
%% miscellaneous
t_conflicting_destinations/1,
t_cse_assoc/1,
shared_key_tuples/1
shared_key_tuples/1,
map_aliases/1
]).

-define(badmap(V, F, Args), {'EXIT', {{badmap,V}, [{maps,F,Args,_}|_]}}).
Expand Down Expand Up @@ -163,7 +164,8 @@ all() ->
%% miscellaneous
t_conflicting_destinations,
t_cse_assoc,
shared_key_tuples
shared_key_tuples,
map_aliases
].

groups() -> [].
Expand Down Expand Up @@ -2567,6 +2569,16 @@ shared_key_tuples(_Config) ->
decimal(Int) ->
#{type => decimal, int => Int, exp => 0}.

%% GH-6348/OTP-18297: Extend parallel matching of maps.
map_aliases(_Config) ->
F1 = fun(M) ->
#{K := V} = #{k := {a,K}} = M,
V
end,
value = F1(#{k => {a,key}, key => value}),

ok.

%% aux

rand_terms(0) -> [];
Expand Down
Loading

0 comments on commit ec6c392

Please sign in to comment.