From 5c82ff50300e9fbec0e6dc236c6cd7cf5062b915 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20P=C3=A9dron?= Date: Tue, 5 Apr 2022 16:55:03 +0200 Subject: [PATCH 1/6] Dialyzer: Remove `race_conditions` option It is no longer supported in Erlang 25. --- rebar.config | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/rebar.config b/rebar.config index eadee12d..560a27de 100644 --- a/rebar.config +++ b/rebar.config @@ -12,8 +12,7 @@ warnings_as_errors]}. {dialyzer, [{plt_extra_apps, [edoc, eunit, inets, mnesia, proper, ssl, xmerl]}, - {warnings, [race_conditions, - underspecs, + {warnings, [underspecs, unknown, unmatched_returns]}]}. From f36e3d0afaa568695ad46fad190c24b1f3419643 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20P=C3=A9dron?= Date: Thu, 7 Apr 2022 10:03:02 +0200 Subject: [PATCH 2/6] khepri_fun: Read the "StrT" chunk This chunk contains a list of concatenated binaries. Instructions can reference strings inside this "StrT" chunk. Instructions are responsible for providing the offset in this chunk where to find the string, and the length of that string. Here is an example from the `transactions` testsuite. The code calls `http_util:maybe_add_brackets/2` indirectly. Here is an extract from this funtion source code at the time of this commit: ```erlang ... maybe_add_brackets(Addr, true) when is_list(Addr) -> case is_ipv6_address(Addr) of true -> [$[|Addr] ++ "]"; ... ``` The code uses the $[ character and the "]" string. They are encoded in the module's "StrT" chunk as concatenared binaries: ```erlang <<"[]">> %% <<91, 93>> ``` In its compiled form, the code uses the `bs_create_bin` instruction. This instruction references the two characters, "[" and "]", in the "StrT" chunk. For instance, the reference to "]": ```erlang %% Part of the instruction's arguments: {atom, string}, _Seg, _Unit, _Flags, {u, 1 = Offset} = _Val, %% <- The offset into the "StrT" chunk. {integer, 1 = Length} = _Size %% <- The length of the binary to extract %% from the "StrT" chunk. ``` It's then easy to call `binary:part/2` to extract the wanted string from the "StrT" chunk: ```erlang String = binary:part(StrTChunk, {Offset, Length}). ``` Taking the example from the `bs_create_bin` instruction above: ```erlang <<"]">> = binary:part(<<"[]">>, {1, 1}). ``` The instruction's arguments above become: ```erlang %% Part of the instruction's arguments: {atom, string}, _Seg, _Unit, _Flags, {string, <<"]">>} = _Val, {integer, 1 = Length} = _Size ``` --- src/khepri_fun.erl | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/src/khepri_fun.erl b/src/khepri_fun.erl index beb98608..db44ad9a 100644 --- a/src/khepri_fun.erl +++ b/src/khepri_fun.erl @@ -158,7 +158,8 @@ code = [] :: [#function{}], %% Added in this module to stored the decoded "Line" %% chunk. - lines :: #lines{} | undefined}). + lines :: #lines{} | undefined, + strings :: binary() | undefined}). -type ensure_instruction_is_permitted_fun() :: fun((Instruction :: beam_instr()) -> ok). @@ -248,6 +249,7 @@ fun((#{calls := #{Call :: mfa() => true}, functions = #{} :: #{mfa() => #function{}}, lines_in_progress :: #lines{} | undefined, + strings_in_progress :: binary() | undefined, mfa_in_progress :: mfa() | undefined, function_in_progress :: atom() | undefined, next_label = 1 :: label(), @@ -1345,19 +1347,23 @@ erl_eval_fun_to_asm(Module, Name, Arity, [{Bindings, _, _, Clauses}]) disassemble_module(Module, #state{checksums = Checksums} = State) -> case Checksums of #{Module := Checksum} -> - {#beam_file_ext{lines = Lines} = BeamFileRecord, + {#beam_file_ext{lines = Lines, + strings = Strings} = BeamFileRecord, Checksum} = disassemble_module1(Module, Checksum), - State1 = State#state{lines_in_progress = Lines}, + State1 = State#state{lines_in_progress = Lines, + strings_in_progress = Strings}, {BeamFileRecord, State1}; _ -> - {#beam_file_ext{lines = Lines} = BeamFileRecord, + {#beam_file_ext{lines = Lines, + strings = Strings} = BeamFileRecord, Checksum} = disassemble_module1(Module, undefined), ?assert(is_binary(Checksum)), Checksums1 = Checksums#{Module => Checksum}, State1 = State#state{checksums = Checksums1, - lines_in_progress = Lines}, + lines_in_progress = Lines, + strings_in_progress = Strings}, {BeamFileRecord, State1} end. @@ -1426,13 +1432,15 @@ do_disassemble(Beam) -> compile_info = CompileInfo, code = Code} = BeamFileRecord, Lines = get_and_decode_line_chunk(Module, Beam), + Strings = get_and_decode_string_chunk(Module, Beam), BeamFileRecordExt = #beam_file_ext{ module = Module, labeled_exports = LabeledExports, attributes = Attributes, compile_info = CompileInfo, code = Code, - lines = Lines}, + lines = Lines, + strings = Strings}, BeamFileRecordExt. %% The "Line" beam chunk decoding is based on the equivalent C code in ERTS. @@ -1538,6 +1546,17 @@ decode_line_chunk_names(<<>>, I, #lines{name_count = NameCount} = Lines) when I =:= NameCount -> Lines. +get_and_decode_string_chunk(Module, Beam) -> + case beam_lib:chunks(Beam, ["StrT"]) of + {ok, {Module, [{"StrT", Chunk}]}} -> + %% There is nothing to decode: the chunk is made of concatenated + %% binaries. The instruction knows the offset inside the chunk and + %% the length of the binary to extract. + Chunk; + _ -> + undefined + end. + %% See: erts/emulator/beam/beam_file.c, beamreader_read_tagged(). read_tagged( From 87adefd967074392a59a0f941d5187b1ecef9f1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20P=C3=A9dron?= Date: Tue, 5 Apr 2022 18:49:50 +0200 Subject: [PATCH 3/6] khepri_fun: Support `bs_create_bin` instruction The instruction as decoded by `beam_disasm` needs some fixes: 1. Several integer arguments are wrapped in `{u, Integer}` and must be unwrapped. 2. Two arguments must be removed; they are only used to decode the following variable list of arguments and don't need to be retained in the decoded instruction. 3. The variable list of arguments needs to be patched like the first positional arguments (e.g. unwrapping integers). We also need to take strings in the "StrT" chunk and replace the reference by the actual string. --- src/khepri_fun.erl | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/khepri_fun.erl b/src/khepri_fun.erl index db44ad9a..828f8dbf 100644 --- a/src/khepri_fun.erl +++ b/src/khepri_fun.erl @@ -961,6 +961,18 @@ pass1_process_instructions( %% `beam_disasm' did not decode this instruction's field flags. Instruction = decode_field_flags(Instruction0, 8), pass1_process_instructions([Instruction | Rest], State, Result); +pass1_process_instructions( + [{bs_create_bin, + [{{f, _} = Fail, {u, Heap}, {u, Live}, {u, Unit}, Dst, _, _N, List}]} + | Rest], + State, + Result) when is_list(List) -> + %% `beam_disasm' decoded the instruction's arguments as a tuple inside a + %% list. They should be part of the instruction's tuple. Also, various + %% arguments are not wrapped/unwrapped correctly. + List1 = fix_create_bin_list(List, State), + Instruction = {bs_create_bin, Fail, Heap, Live, Unit, Dst, {list, List1}}, + pass1_process_instructions([Instruction | Rest], State, Result); pass1_process_instructions( [{bs_private_append, _, _, _, _, {field_flags, FF}, _} = Instruction0 | Rest], State, @@ -1673,6 +1685,33 @@ decode_field_flags({field_flags, FieldFlagsBitField}) -> FieldFlags = decode_field_flags(FieldFlagsBitField), {field_flags, FieldFlags}. +fix_create_bin_list( + [{atom, string} = Type, Seg, Unit, Flags, {u, Offset} = _Val, Size + | Args], + #state{strings_in_progress = Strings} = State) -> + Seg1 = fix_integer(Seg), + Unit1 = fix_integer(Unit), + Size1 = {integer, Length} = fix_integer(Size), + ?assertNotEqual(undefined, Strings), + Binary = binary:part(Strings, {Offset, Length}), + Val = {string, Binary}, + [Type, Seg1, Unit1, Flags, Val, Size1 | fix_create_bin_list(Args, State)]; +fix_create_bin_list( + [Type, Seg, Unit, Flags, Val, Size + | Args], + State) -> + Seg1 = fix_integer(Seg), + Unit1 = fix_integer(Unit), + Val1 = fix_integer(Val), + Size1 = fix_integer(Size), + [Type, Seg1, Unit1, Flags, Val1, Size1 | fix_create_bin_list(Args, State)]; +fix_create_bin_list([], _State) -> + []. + +fix_integer({u, U}) -> U; +fix_integer({i, I}) -> {integer, I}; +fix_integer(Other) -> Other. + -spec ensure_instruction_is_permitted(Instruction, State) -> State when Instruction :: beam_instr(), @@ -1862,6 +1901,9 @@ pass2_process_instruction( pass2_process_instruction( {bs_append, _, _, _, _, _, _, _, _} = Instruction, State) -> replace_label(Instruction, 2, State); +pass2_process_instruction( + {bs_create_bin, _, _, _, _, _, _} = Instruction, State) -> + replace_label(Instruction, 2, State); pass2_process_instruction( {bs_init2, _, _, _, _, _, _} = Instruction, State) -> replace_label(Instruction, 2, State); From 175a7dfc63e133fbeaeec5bb516a2a8580735d67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20P=C3=A9dron?= Date: Tue, 5 Apr 2022 18:50:03 +0200 Subject: [PATCH 4/6] Transactions: Allow `bs_create_bin` instruction --- src/khepri_tx.erl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/khepri_tx.erl b/src/khepri_tx.erl index c87827e4..f11538ae 100644 --- a/src/khepri_tx.erl +++ b/src/khepri_tx.erl @@ -472,6 +472,8 @@ ensure_instruction_is_permitted({bs_add, _, _, _}) -> ok; ensure_instruction_is_permitted({bs_append, _, _, _, _, _, _, _, _}) -> ok; +ensure_instruction_is_permitted({bs_create_bin, _, _, _, _, _, _}) -> + ok; ensure_instruction_is_permitted({bs_init2, _, _, _, _, _, _}) -> ok; ensure_instruction_is_permitted({bs_init_bits, _, _, _, _, _, _}) -> From e7695b6d8646bc497d4c4ee1fd19f14f1965faa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20P=C3=A9dron?= Date: Wed, 6 Apr 2022 14:28:33 +0200 Subject: [PATCH 5/6] khepri_fun: Handle type-tagged beam registers There are two problems with them: 1. They are not correctly decoded by `beam_disasm`: the type is inside a tuple with two integers. It should have been the type only. 2. They can't be used directly in the `var_info` comment: only the register inside the type-tagged beam register tuple should be retained for that. We currently handle them for the following instructions: * `get_tuple_element` * `select_tuple_arity` * `get_map_elements` --- src/khepri_fun.erl | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/src/khepri_fun.erl b/src/khepri_fun.erl index 828f8dbf..343addec 100644 --- a/src/khepri_fun.erl +++ b/src/khepri_fun.erl @@ -1159,28 +1159,41 @@ pass1_process_instructions( State, Result) -> State1 = ensure_instruction_is_permitted(Instruction, State), + Src1 = fix_type_tagged_beam_register(Src), + Instruction1 = setelement(2, Instruction, Src1), + + Reg = get_reg_from_type_tagged_beam_register(Src1), Type = {t_tuple, Element + 1, false, #{}}, - VarInfo = {var_info, Src, [{type, Type}]}, + VarInfo = {var_info, Reg, [{type, Type}]}, Comment = {'%', VarInfo}, - pass1_process_instructions(Rest, State1, [Instruction, Comment | Result]); + + pass1_process_instructions(Rest, State1, [Instruction1, Comment | Result]); pass1_process_instructions( [{select_tuple_arity, Src, _, _} = Instruction | Rest], State, Result) -> State1 = ensure_instruction_is_permitted(Instruction, State), + Src1 = fix_type_tagged_beam_register(Src), + Instruction1 = setelement(2, Instruction, Src1), + + Reg = get_reg_from_type_tagged_beam_register(Src1), Type = {t_tuple, 0, false, #{}}, - VarInfo = {var_info, Src, [{type, Type}]}, + VarInfo = {var_info, Reg, [{type, Type}]}, Comment = {'%', VarInfo}, - pass1_process_instructions(Rest, State1, [Instruction, Comment | Result]); + pass1_process_instructions(Rest, State1, [Instruction1, Comment | Result]); pass1_process_instructions( [{get_map_elements, _Fail, Src, {list, _}} = Instruction | Rest], State, Result) -> State1 = ensure_instruction_is_permitted(Instruction, State), + Src1 = fix_type_tagged_beam_register(Src), + Instruction1 = setelement(3, Instruction, Src1), + + Reg = get_reg_from_type_tagged_beam_register(Src1), Type = {t_map, any, any}, - VarInfo = {var_info, Src, [{type, Type}]}, + VarInfo = {var_info, Reg, [{type, Type}]}, Comment = {'%', VarInfo}, - pass1_process_instructions(Rest, State1, [Instruction, Comment | Result]); + pass1_process_instructions(Rest, State1, [Instruction1, Comment | Result]); pass1_process_instructions( [{put_map_assoc, _Fail, Src, _Dst, _Live, {list, _}} = Instruction | Rest], State, @@ -1712,6 +1725,12 @@ fix_integer({u, U}) -> U; fix_integer({i, I}) -> {integer, I}; fix_integer(Other) -> Other. +fix_type_tagged_beam_register({tr, Reg, {Type, _, _}}) -> {tr, Reg, Type}; +fix_type_tagged_beam_register(Other) -> Other. + +get_reg_from_type_tagged_beam_register({tr, Reg, _}) -> Reg; +get_reg_from_type_tagged_beam_register(Reg) -> Reg. + -spec ensure_instruction_is_permitted(Instruction, State) -> State when Instruction :: beam_instr(), From 66a8e2b899fc34196fd474af43a6c581d1781640 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20P=C3=A9dron?= Date: Wed, 6 Apr 2022 14:31:08 +0200 Subject: [PATCH 6/6] khepri_fun: Handle `erl_eval` source code structure changes In Erlang 25, it looks like the source code if the function is the anonymous function environment is stored slightly differently. It's easy to support both the old and the new tuples and extract that source code. --- src/khepri_fun.erl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/khepri_fun.erl b/src/khepri_fun.erl index 343addec..63d4ad70 100644 --- a/src/khepri_fun.erl +++ b/src/khepri_fun.erl @@ -1335,9 +1335,18 @@ find_function( BeamFileRecord :: #beam_file_ext{}. %% @private +erl_eval_fun_to_asm(Module, Name, Arity, [{_, Bindings, _, _, _, Clauses}]) + when Bindings =:= [] orelse %% Erlang is using a list for bindings, + Bindings =:= #{} -> %% but Elixir is using a map. + %% Erlang starting from 25. + erl_eval_fun_to_asm1(Module, Name, Arity, Clauses); erl_eval_fun_to_asm(Module, Name, Arity, [{Bindings, _, _, Clauses}]) when Bindings =:= [] orelse %% Erlang is using a list for bindings, Bindings =:= #{} -> %% but Elixir is using a map. + %% Erlang up to 24. + erl_eval_fun_to_asm1(Module, Name, Arity, Clauses). + +erl_eval_fun_to_asm1(Module, Name, Arity, Clauses) -> %% We construct an abstract form based on the `env' of the lambda loaded %% by `erl_eval'. Anno = erl_anno:from_term(1),