From 879ac0c741d56141be97c1820ee96c6b9e36a3aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Vouillon?= Date: Tue, 20 Feb 2024 15:39:56 +0100 Subject: [PATCH] Generate valid Wasm code - Ensure that locals are always explicitely initialized before being used - Do not declare a memory if not used --- compiler/bin-wasm_of_ocaml/compile.ml | 1 - compiler/lib/wasm/wa_code_generation.ml | 3 +- compiler/lib/wasm/wa_core_target.ml | 2 + compiler/lib/wasm/wa_gc_target.ml | 2 + compiler/lib/wasm/wa_generate.ml | 1 + compiler/lib/wasm/wa_initialize_locals.ml | 112 +++++++++++++++++++++ compiler/lib/wasm/wa_initialize_locals.mli | 5 + compiler/lib/wasm/wa_target_sig.ml | 6 ++ compiler/lib/wasm/wa_wat_output.ml | 11 +- 9 files changed, 138 insertions(+), 5 deletions(-) create mode 100644 compiler/lib/wasm/wa_initialize_locals.ml create mode 100644 compiler/lib/wasm/wa_initialize_locals.mli diff --git a/compiler/bin-wasm_of_ocaml/compile.ml b/compiler/bin-wasm_of_ocaml/compile.ml index 2191b5e9d..c70a990f2 100644 --- a/compiler/bin-wasm_of_ocaml/compile.ml +++ b/compiler/bin-wasm_of_ocaml/compile.ml @@ -64,7 +64,6 @@ let common_binaryen_options = ; "--enable-nontrapping-float-to-int" ; "--enable-strings" ; "-g" - ; "-n" ] let link runtime_files input_file output_file = diff --git a/compiler/lib/wasm/wa_code_generation.ml b/compiler/lib/wasm/wa_code_generation.ml index 6ef0daca1..fd9061acc 100644 --- a/compiler/lib/wasm/wa_code_generation.ml +++ b/compiler/lib/wasm/wa_code_generation.ml @@ -446,7 +446,8 @@ let drop e = match e with | W.Seq (l, e') -> let* b = is_small_constant e' in - if b then instrs l else instr (Drop e) + let* () = instrs l in + if b then return () else instr (Drop e') | _ -> instr (Drop e) let push e = diff --git a/compiler/lib/wasm/wa_core_target.ml b/compiler/lib/wasm/wa_core_target.ml index fc11dbf30..ddec0394a 100644 --- a/compiler/lib/wasm/wa_core_target.ml +++ b/compiler/lib/wasm/wa_core_target.ml @@ -632,6 +632,8 @@ let handle_exceptions ~result_typ ~fall_through ~context body x exn_handler = exn_handler ~result_typ ~fall_through ~context ) ] +let post_process_function_body ~param_count:_ ~locals:_ instrs = instrs + let entry_point ~context:_ ~toplevel_fun = let code = let declare_global name = diff --git a/compiler/lib/wasm/wa_gc_target.ml b/compiler/lib/wasm/wa_gc_target.ml index 737177c6d..ec98462cc 100644 --- a/compiler/lib/wasm/wa_gc_target.ml +++ b/compiler/lib/wasm/wa_gc_target.ml @@ -1500,6 +1500,8 @@ let handle_exceptions ~result_typ ~fall_through ~context body x exn_handler = in exn_handler ~result_typ ~fall_through ~context) +let post_process_function_body = Wa_initialize_locals.f + let entry_point ~context ~toplevel_fun = let code = let* f = diff --git a/compiler/lib/wasm/wa_generate.ml b/compiler/lib/wasm/wa_generate.ml index 4bda507ba..3782eefc5 100644 --- a/compiler/lib/wasm/wa_generate.ml +++ b/compiler/lib/wasm/wa_generate.ml @@ -971,6 +971,7 @@ module Generate (Target : Wa_target_sig.S) = struct (fun ~result_typ ~fall_through ~context -> translate_branch result_typ fall_through (-1) cont context stack_ctx)) in + let body = post_process_function_body ~param_count ~locals body in W.Function { name = (match name_opt with diff --git a/compiler/lib/wasm/wa_initialize_locals.ml b/compiler/lib/wasm/wa_initialize_locals.ml new file mode 100644 index 000000000..0f0931135 --- /dev/null +++ b/compiler/lib/wasm/wa_initialize_locals.ml @@ -0,0 +1,112 @@ +open Stdlib + +type ctx = + { mutable initialized : IntSet.t + ; uninitialized : IntSet.t ref + } + +let mark_initialized ctx i = ctx.initialized <- IntSet.add i ctx.initialized + +let fork_context { initialized; uninitialized } = { initialized; uninitialized } + +let check_initialized ctx i = + if not (IntSet.mem i ctx.initialized) + then ctx.uninitialized := IntSet.add i !(ctx.uninitialized) + +let rec scan_expression ctx e = + match e with + | Wa_ast.Const _ | ConstSym _ | GlobalGet _ | Pop _ | RefFunc _ | RefNull _ -> () + | UnOp (_, e') + | I32WrapI64 e' + | I64ExtendI32 (_, e') + | F32DemoteF64 e' + | F64PromoteF32 e' + | Load (_, e') + | Load8 (_, _, e') + | MemoryGrow (_, e') + | RefI31 e' + | I31Get (_, e') + | ArrayLen e' + | StructGet (_, _, _, e') + | RefCast (_, e') + | RefTest (_, e') + | Br_on_cast (_, _, _, e') + | Br_on_cast_fail (_, _, _, e') + | ExternInternalize e' + | ExternExternalize e' -> scan_expression ctx e' + | BinOp (_, e', e'') + | ArrayNew (_, e', e'') + | ArrayNewData (_, _, e', e'') + | ArrayGet (_, _, e', e'') + | RefEq (e', e'') -> + scan_expression ctx e'; + scan_expression ctx e'' + | LocalGet i -> check_initialized ctx i + | LocalTee (i, e') -> + scan_expression ctx e'; + mark_initialized ctx i + | Call_indirect (_, e', l) | Call_ref (_, e', l) -> + scan_expressions ctx l; + scan_expression ctx e' + | Call (_, l) | ArrayNewFixed (_, l) | StructNew (_, l) -> scan_expressions ctx l + | BlockExpr (_, l) -> scan_instructions ctx l + | Seq (l, e') -> scan_instructions ctx (l @ [ Push e' ]) + +and scan_expressions ctx l = List.iter ~f:(fun e -> scan_expression ctx e) l + +and scan_instruction ctx i = + match i with + | Wa_ast.Drop e + | GlobalSet (_, e) + | Br (_, Some e) + | Br_if (_, e) + | Br_table (e, _, _) + | Throw (_, e) + | Return (Some e) + | Push e -> scan_expression ctx e + | Store (_, e, e') | Store8 (_, e, e') | StructSet (_, _, e, e') -> + scan_expression ctx e; + scan_expression ctx e' + | LocalSet (i, e) -> + scan_expression ctx e; + mark_initialized ctx i + | Loop (_, l) | Block (_, l) -> scan_instructions ctx l + | If (_, e, l, l') -> + scan_expression ctx e; + scan_instructions ctx l; + scan_instructions ctx l' + | Try (_, body, catches, catch_all) -> + scan_instructions ctx body; + List.iter ~f:(fun (_, l) -> scan_instructions ctx l) catches; + Option.iter ~f:(fun l -> scan_instructions ctx l) catch_all + | CallInstr (_, l) | Return_call (_, l) -> scan_expressions ctx l + | Br (_, None) | Return None | Rethrow _ | Nop -> () + | ArraySet (_, e, e', e'') -> + scan_expression ctx e; + scan_expression ctx e'; + scan_expression ctx e'' + | Return_call_indirect (_, e', l) | Return_call_ref (_, e', l) -> + scan_expressions ctx l; + scan_expression ctx e' + +and scan_instructions ctx l = + let ctx = fork_context ctx in + List.iter ~f:(fun i -> scan_instruction ctx i) l + +let f ~param_count ~locals instrs = + let ctx = { initialized = IntSet.empty; uninitialized = ref IntSet.empty } in + for i = 0 to param_count - 1 do + mark_initialized ctx i + done; + List.iteri + ~f:(fun i typ -> + match (typ : Wa_ast.value_type) with + | I32 | I64 | F32 | F64 | Ref { nullable = true; _ } -> + mark_initialized ctx (i + param_count) + | Ref { nullable = false; _ } -> ()) + locals; + scan_instructions ctx instrs; + List.map + ~f:(fun i -> Wa_ast.LocalSet (i, RefI31 (Const (I32 0l)))) + (IntSet.elements !(ctx.uninitialized)) + @ instrs diff --git a/compiler/lib/wasm/wa_initialize_locals.mli b/compiler/lib/wasm/wa_initialize_locals.mli new file mode 100644 index 000000000..53e752069 --- /dev/null +++ b/compiler/lib/wasm/wa_initialize_locals.mli @@ -0,0 +1,5 @@ +val f : + param_count:int + -> locals:Wa_ast.value_type list + -> Wa_ast.instruction list + -> Wa_ast.instruction list diff --git a/compiler/lib/wasm/wa_target_sig.ml b/compiler/lib/wasm/wa_target_sig.ml index ee43e854a..5083d3946 100644 --- a/compiler/lib/wasm/wa_target_sig.ml +++ b/compiler/lib/wasm/wa_target_sig.ml @@ -294,6 +294,12 @@ module type S = sig -> unit Wa_code_generation.t) -> unit Wa_code_generation.t + val post_process_function_body : + param_count:int + -> locals:Wa_ast.value_type list + -> Wa_ast.instruction list + -> Wa_ast.instruction list + val entry_point : context:Wa_code_generation.context -> toplevel_fun:Wa_ast.var diff --git a/compiler/lib/wasm/wa_wat_output.ml b/compiler/lib/wasm/wa_wat_output.ml index 56868977e..9f67ed28c 100644 --- a/compiler/lib/wasm/wa_wat_output.ml +++ b/compiler/lib/wasm/wa_wat_output.ml @@ -601,9 +601,14 @@ let f ch fields = (List (Atom "module" :: (List.concat (List.map ~f:import fields) - @ [ List - [ Atom "memory"; Atom (string_of_int ((heap_base + 0xffff) / 0x10000)) ] - ] + @ (if Code.Var.Map.is_empty addresses + then [] + else + [ List + [ Atom "memory" + ; Atom (string_of_int ((heap_base + 0xffff) / 0x10000)) + ] + ]) @ funct_table @ funct_decl @ other_fields)))