-
Notifications
You must be signed in to change notification settings - Fork 4
/
explain.py
274 lines (237 loc) · 10.8 KB
/
explain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
import traceback
from dataclasses import FrozenInstanceError, dataclass
from binaryninja import (
LowLevelILOperation,
LowLevelILInstruction,
ILFlag,
BackgroundTaskThread,
log_warn,
)
from .explanations import il_explanations as explanations
from .util import *
# Types of instructions that we won't bother surrounding with parentheses because they don't
# substantially clarify anything.
no_paren = {
LowLevelILOperation.LLIL_CONST,
LowLevelILOperation.LLIL_REG,
LowLevelILOperation.LLIL_CONST_PTR,
LowLevelILOperation.LLIL_POP,
LowLevelILOperation.LLIL_FLAG,
}
@dataclass
class RecursiveExplainer:
"""Attempts to explain anything passed to __getattr__"""
bv: BinaryView
instruction: LowLevelILInstruction
def __getattr__(self, item):
if not hasattr(self.instruction, item):
raise AttributeError(f"{self.instruction} has no attribute '{item}'")
item = getattr(self.instruction, item)
if isinstance(item, LowLevelILInstruction):
return ("{}" if item.operation in no_paren else "({})").format(
explain_llil(self.bv, item)
)
return item
def preprocess_LLIL_CONST(_bv, llil_instruction):
"""Get the rendered string Binja would use, since the values are signed"""
return {"constant": llil_instruction.tokens[0]}
def preprocess_LLIL_CONST_PTR(bv, llil_instruction):
"""Replaces integer constants with symbols (if available) and hex tokens otherwise"""
constant = None
found_symbol = False
for symbol in bv.get_symbols():
if symbol.address == llil_instruction.constant:
constant = symbol.name
found_symbol = True
break
if not found_symbol:
constant = to_hex(llil_instruction.constant)
return {"constant": constant}
def preprocess_LLIL_FLAG_COND(_bv, llil_instruction):
"""Expands FLAG_COND enums"""
return {"condition": explanations[llil_instruction.condition.name]}
def preprocess_LLIL_GOTO(bv, llil_instruction):
"""Replaces integer addresses of llil instructions with hex addresses of assembly"""
func = get_function_at(bv, llil_instruction.address)
# We have to use the lifted IL since the LLIL ignores comparisons and tests
lifted_instruction = list(
[
k
for k in find_lifted_il(func, llil_instruction.address)
if k.operation == LowLevelILOperation.LLIL_GOTO
]
)[0]
lifted_il = func.lifted_il
return {"dest": to_hex(lifted_il[lifted_instruction.dest].address)}
def preprocess_LLIL_IF(bv, llil_instruction):
"""Replaces integer addresses of llil instructions with hex addresses of assembly"""
func = get_function_at(bv, llil_instruction.address)
# We have to use the lifted IL since the LLIL ignores comparisons and tests
lifted_instruction = list(
[
k
for k in find_lifted_il(func, llil_instruction.address)
if k.operation == LowLevelILOperation.LLIL_IF
]
)[0]
lifted_il = func.lifted_il
return {
"true": to_hex(lifted_il[lifted_instruction.true].address),
"false": to_hex(lifted_il[lifted_instruction.false].address),
}
def preprocess_LLIL_FLAG(bv, llil_instruction):
"""Follow back temporary flags and append the address where they're created"""
source = llil_instruction.src
address = llil_instruction.address
if llil_instruction.src.temp:
flag = llil_instruction.ssa_form.src
indx = llil_instruction.function.get_ssa_flag_definition(flag).instr_index
src = llil_instruction.function[indx]
if hasattr(src, "src"):
# Make sure that we're actually looking at a instruction that sets something (and not a Phi function)
source = src.src
address = to_hex(llil_instruction.src.address)
elif type(llil_instruction.src == ILFlag):
# Sometimes we have a temporary flag that resolves to a Phi function, which makes it show up at the same address.
# Rather than try to build a conditional tree from the phi function (potentially impossible?) we default back to
# the CPU flags.
lifted_instruction = list(
[
k
for k in find_lifted_il(
llil_instruction.function.source_function,
llil_instruction.address,
)
if k.operation == LowLevelILOperation.LLIL_IF
]
)[0]
source = lifted_instruction.condition
address = "in multiple code paths"
elif type(llil_instruction.src) == ILFlag:
# On occasion, binja won't know what to do with a CPU flag and will use it "raw" without figuring
# out what the conditional means. Happens with the direction flag on x86 sometimes.
source = (
bv.arch.flag_roles[llil_instruction.src.name].name.replace("Role", "")
+ " is set"
)
address = "unknown"
return {"source": source, "address": address}
def preprocess_LLIL_REG(_bv, llil_instruction):
"""Follow back temporary registers and append the address where they're created"""
loc = ""
source = llil_instruction.src
if llil_instruction.src.temp:
reg = llil_instruction.ssa_form.src
indx = llil_instruction.function.get_ssa_reg_definition(reg)
src = llil_instruction.function[indx]
if hasattr(src, "src"):
# I've never seen it in the wild, but it's probably possible for a temporary variable to be sourced
# from a Phi function on the same instruction, which could lead to infinite recursion
source = src.src
# Add a location flag so it's clear where in the program execution we actually got the source values from,
# in case they've changed since then
loc = " (at instruction {})".format(to_hex(src.address))
else:
loc = " (value dependent on code path used to reach this instruction)"
return {"location": loc, "source": source}
# Map LLIL operation names to function pointers
preprocess_dict = {
"LLIL_IF": preprocess_LLIL_IF, # Conditional jumps
"LLIL_GOTO": preprocess_LLIL_GOTO, # Unconditional jumps
"LLIL_CONST": preprocess_LLIL_CONST,
"LLIL_CONST_PTR": preprocess_LLIL_CONST_PTR, # Seems to refer to a constant in .data - could consider dereferencing these
"LLIL_FLAG_COND": preprocess_LLIL_FLAG_COND,
"LLIL_REG": preprocess_LLIL_REG, # Registers (including temporary)
# "LLIL_FLAG": preprocess_LLIL_FLAG, # Temporary flags # TODO: Fix flag handling
}
def preprocess(bv, llil_instruction):
"""Apply preprocess functions to instructions and expand explanations for nested LLIL operations"""
environment = {
# Any additional information for formatting explanations can be added here
"llil": RecursiveExplainer(bv, llil_instruction),
"arch": bv.arch,
}
environment.update(
preprocess_dict.get(llil_instruction.operation.name, lambda *_: {})(
bv, llil_instruction
)
)
return environment
def explain_llil(bv, llil_instruction):
"""Returns the explanation string from explanations_en.json, formatted with the preprocessed LLIL instruction"""
if llil_instruction is None:
return None
name = llil_instruction.operation.name
if name in explanations:
try:
# Get the string from the JSON and format it
return explanations[name].format(**preprocess(bv, llil_instruction))
except FrozenInstanceError as e:
# Trying to assign data to the LLIL instruction. Definitely a bug.
raise e
except AttributeError:
# Usually a bad format string. Shouldn't show up unless something truly weird happens.
log_error(traceback.format_exc())
return name
# If there's anything in the LLIL that doesn't have an explanation, yell about it in the logs
log_warn(f"Explain Instruction doesn't understand {name} yet")
return name
def fold_multi_il(_bv, llil_list):
"""Filters out the setting of temporary registers and flags"""
out = []
# This doesn't do any "folding" right now. In the future, we could fold temporary variables into
# instructions that use them rather than seeking them in the preprocess functions, but there are some issues
# with this. Notably, there's no way to accurately represent atomic combinations of instructions without temporary
# variables, which means that we might present innacurate explanations if we just got rid of them entirely.
# It might be possible to detect those cases, or a hypothetical LLIL_ATOMIC operation could save us from having
# to think about it, but until I figure those out, this function is just going to be a simple filter.
for llil in llil_list:
if llil.operation == LowLevelILOperation.LLIL_SET_FLAG:
pass
elif llil.operation == LowLevelILOperation.LLIL_SET_REG and llil.dest.temp:
pass
else:
out.append(llil)
return out
def make_description(bv, arch_explainer, instruction, lifted_il_list, llil_list):
# Typically, we use the Low Level IL for parsing instructions. However, sometimes there isn't a corresponding
# LLIL instruction (like for cmp), so in cases like that, we use the lifted IL, which is closer to the raw assembly
parse_il = fold_multi_il(bv, llil_list if len(llil_list) > 0 else lifted_il_list)
# Give the architecture submodule a chance to supply an explanation for this instruction that takes precedence
# over the one generated via the LLIL
(
should_supersede,
explanation_list,
) = arch_explainer.explain_instruction(instruction, lifted_il_list)
return explanation_list + (
[] if should_supersede else [explain_llil(bv, llil) for llil in parse_il]
)
class ThreadExplainer(BackgroundTaskThread):
def __init__(
self, bv, arch_explainer, instruction, lifted_il_list, llil_list, final_callback
):
super().__init__(
f"Generating Explanation for {fmt_instruction(instruction)}...",
can_cancel=True,
)
self.bv = bv
self.arch_explainer = arch_explainer
self.instruction = instruction
self.lifted_il_list = lifted_il_list
self.llil_list = llil_list
self.final_callback = final_callback
self.descriptions = []
def run(self):
self.descriptions = make_description(
self.bv,
self.arch_explainer,
self.instruction,
self.lifted_il_list,
self.llil_list,
)
def cancel(self):
self.final_callback = lambda *_: None
super().cancel()
def finish(self):
super().finish()
self.final_callback(self.descriptions)