Skip to content

Commit

Permalink
adds subinstruction contraction to improve the ghidra lifter output
Browse files Browse the repository at this point in the history
The code produced by ghidra lifter produces several basic blocks per
instruction. This simple optimization contracts (collapeses into
single block) basic blocks that originate from different
subinstructions of the same instruction (when it preserves semantics,
of course)
  • Loading branch information
ivg committed Mar 9, 2022
1 parent fb0414c commit f8f63cf
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 6 deletions.
76 changes: 72 additions & 4 deletions plugins/bil/bil_ir.ml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ include Self()
type blk = {
name : Theory.Label.t;
keep : bool;
weak : bool;
defs : def term list;
jmps : jmp term list;
} [@@deriving bin_io]
Expand Down Expand Up @@ -54,13 +55,72 @@ module BIR = struct
Blk.Builder.result b :: blks |>
List.rev

let resolve jmp = Option.(Jmp.(dst jmp >>| resolve))

let references blks =
List.fold ~init:Tid.Map.empty ~f:(fun refs {jmps} ->
List.fold jmps ~init:refs ~f:(fun refs jmp ->
match resolve jmp with
| Some (First tid) when Set.mem blks tid ->
Map.update refs tid ~f:(function
| None -> 1
| Some refs -> refs+1)
| _ -> refs))

let names =
List.fold ~init:Tid.Set.empty ~f:(fun blks {name} ->
Set.add blks name)

let single_dst = function
| [] | _ :: _ :: _ -> None
| [x] -> match resolve x with
| Some First tid -> Some tid
| _ -> None

let is_sub {weak; keep} = keep && weak

let can_contract refs b1 b2 =
is_sub b1 && is_sub b2 && match single_dst b1.jmps with
| None -> false
| Some dst ->
Tid.equal dst b2.name &&
match Map.find refs dst with
| Some 1 -> true
| _ -> false

(* pre: can_contract b1 b2 /\
can_contract b2 b3 .. *)
let contract blks = match List.hd blks, List.last blks with
| Some first,Some last -> {
first with
defs = List.(rev@@concat_map blks ~f:(fun {defs} -> List.rev defs));
jmps = last.jmps;
}
| _ -> assert false

let normalize blks =
let names = names blks in
let refs = references names blks in
List.sort blks ~compare:(fun b1 b2 ->
Tid.compare b1.name b2.name) |>
List.group ~break:(fun b1 b2 ->
not @@ can_contract refs b1 b2) |>
List.map ~f:contract

let has_weak_blocks = List.exists ~f:(fun {weak} -> weak)

let normalize = function
| [] | [_] as xs -> xs
| xs -> if has_weak_blocks xs then normalize xs else xs

(* postconditions:
- the first block is the entry block
- the last block is the exit block
*)
let reify {entry; blks} =
if is_null entry then [] else
List.fold blks ~init:(None,[]) ~f:(fun (s,blks) b ->
normalize blks |>
List.fold ~init:(None,[]) ~f:(fun (s,blks) b ->
match make_blk b with
| [] -> assert false
| blk::blks' ->
Expand Down Expand Up @@ -108,7 +168,8 @@ let slot = graph
module IR = struct
include Theory.Empty
let ret = Knowledge.return
let blk ?(keep=true) tid = {name=tid; defs=[]; jmps=[]; keep}
let blk ?(keep=true) tid =
{name=tid; defs=[]; jmps=[]; keep; weak=false}

let def = (fun x -> x.defs), (fun x d -> {x with defs = d})
let jmp = (fun x -> x.jmps), (fun x d -> match x.jmps with
Expand Down Expand Up @@ -148,15 +209,20 @@ module IR = struct
then Jmp.reify ?cnd ~tid ~alt:(Jmp.resolved dst) ()
else Jmp.reify ?cnd ~tid ~dst:(Jmp.resolved dst) ()

let is_subinstruction label =
KB.collect Insn.Seqnum.slot label >>|
Option.is_some

let relink label {entry; blks} =
let* weak = is_subinstruction label in
if is_null entry then KB.return {
entry = label;
blks = [{name=label; keep=true; defs=[]; jmps=[]}]
blks = [{name=label; keep=true; weak; defs=[]; jmps=[]}]
} else
let+ blks = List.fold_map blks ~init:`Unbound ~f:(fun r blk ->
if Theory.Label.equal blk.name entry
then if blk.keep then `Relink blk.name, blk
else `Relinked, {blk with name = label; keep=true}
else `Relinked, {blk with name = label; keep=true; weak}
else r,blk) |> function
| `Relinked,blks -> KB.return blks
| `Relink dst, blks ->
Expand All @@ -175,6 +241,7 @@ module IR = struct
blks = [{
name=entry;
keep=false;
weak=false;
jmps=[];
defs=[Def.reify ~tid v x]
}]
Expand Down Expand Up @@ -211,6 +278,7 @@ module IR = struct
blks = [{
name = head;
keep = true;
weak = false;
defs = [];
jmps = [goto ~cnd ~tid head]}]}
| {entry=loop; blks=b::blks} ->
Expand Down
5 changes: 3 additions & 2 deletions plugins/ghidra/semantics/pcode.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@
(set# td dst (load-bits td ptr)))

(defun branch (typ dst)
(if (is-symbol typ) (exec-addr dst)
(goto-subinstruction dst)))
(if (is-symbol typ)
(exec-addr dst)
(goto-subinstruction dst)))

(defun BRANCH (typ dst)
(branch typ dst))
Expand Down

0 comments on commit f8f63cf

Please sign in to comment.