This is an example of IDA script using the REVEN Python API. Must be run from IDA!
3 This script is a proof-of-concept to add code cross references for dynamic 4 control flow instructions (whose targets cannot be computed by IDA's static 5 analysis) of a binary in IDA. The cross references are local, namely callees 6 being not in the binary will not be counted. 9 Use IDA to load the binary, and give the following arguments before 11 host = your REVEN server name, and 12 port = your REVEN's project port on this host. 15 The script takes targets from the execution trace logged by REVEN (i.e. dynamic 16 analysis). It has the following limits: 17 - the name of the binary must be obtained from REVEN (that is usually the 18 case if "dump process" is executed properly), and be case insentively 19 identical with the one analyzed in IDA (i.e. we do not rename the binary) 21 - the binary must be mapped at a unique virtual base address in the REVEN's 24 - the added cross-refs are not complete (as an inherent problem of dynamic 27 - self-modifying/overlapping instructions are checked in a very limited way 28 (does not assume any coherent result in case of self-modifying/packed code). 38 ida_dynamic_jump_types = [
45 ida_dynamic_call_types = [
51 ida_dynamic_ret_types = [
56 reven_jcc_mnemonics = [
'jmp',
'call',
'retn',
'retf']
60 project = reven.Project(host, port)
62 runtime_base_address = get_base_address(project)
63 if runtime_base_address
is not None:
64 static_base_address = idaapi.get_imagebase()
66 print 'base addresses:' 67 print ' static: 0x{:x}'.format(idaapi.get_imagebase())
68 print ' runtime: 0x{:x}'.format(runtime_base_address)
70 inss = collect_indirect_jccs()
71 trace = get_binary_trace(project)
72 offset = runtime_base_address - static_base_address
74 add_xrefs(inss, trace, offset)
76 print 'adding code xref done.' 78 print 'cannot find the binary {:s} in the REVEN\'s trace or it is mapped at different base addresses.'.format(os.path.basename(idc.GetInputFilePath()))
81 def get_base_address(reven_project):
83 Look for the current IDA's binary name in the REVEN's binary mapping information. 84 If the binary is found, return the base address it is mapped at, else return None. 87 binary_name = os.path.basename(idc.GetInputFilePath()).lower()
89 loaded_binaries = reven_project.binaries()
90 for bin_path
in loaded_binaries:
91 if binary_name == os.path.basename(bin_path).lower():
92 bin_mappings = loaded_binaries[bin_path].mappings
96 for address_space
in bin_mappings.values():
97 if base_address
is None:
98 base_address = address_space.base_address
99 elif base_address != address_space.base_address:
107 def get_binary_trace(reven_project):
109 Return a python generator that filters the trace on the current IDA's binary only. 112 trace = reven_project.trace(
'Execution run')
117 binary_name = os.path.basename(idc.GetInputFilePath())
121 reven_points = trace.search_point([reven.BinaryCriterion(pattern=binary_name,
122 case_sensitive=
False)])
126 def add_xrefs(ida_jc_inss, reven_points, relocation_offset):
127 jcc_dict = {ins.ea: idc.GetManyBytes(ins.ea, ins.size)
for ins
in ida_jc_inss}
130 examined_control_flow = set()
137 for curr_point
in reven_points:
138 if prev_point
is not None:
141 target_point = prev_point
142 for last_ins_of_prev_bb
in prev_point.basic_block:
143 target_point = target_point.next()
148 curr_ins = curr_point.instruction
149 target_ins = target_point.instruction
156 if (curr_ins.address == target_ins.address)
and \
157 (last_ins_of_prev_bb.mnemonic
in reven_jcc_mnemonics)
and \
158 (last_ins_of_prev_bb.address - relocation_offset
in jcc_dict)
and \
159 ((last_ins_of_prev_bb.address, curr_ins.address)
not in examined_control_flow):
160 examined_control_flow.add((last_ins_of_prev_bb.address, curr_ins.address))
161 bb_control_flow.append((last_ins_of_prev_bb, curr_ins))
163 prev_point = curr_point
168 updated_jmp_xrefs = set()
169 updated_call_xrefs = set()
170 updated_ret_xrefs = set()
172 for (prev_ins, curr_ins)
in bb_control_flow:
173 mnemonic = prev_ins.mnemonic
174 caller_address = prev_ins.address - relocation_offset
176 if caller_address
in jcc_dict:
177 if jcc_dict[caller_address] != prev_ins.raw_bytes:
178 print 'warning: instruction at 0x{:x} is modified in running or loading time'.format(caller_address)
180 target_address = curr_ins.address - relocation_offset
181 if mnemonic ==
'jmp':
182 if (caller_address, target_address)
not in updated_jmp_xrefs:
183 idc.AddCodeXref(caller_address, target_address, idc.fl_JF)
184 updated_jmp_xrefs.add((caller_address, target_address))
185 elif mnemonic ==
'call':
186 if (caller_address, target_address)
not in updated_call_xrefs:
187 idc.AddCodeXref(caller_address, target_address, idc.fl_CF)
188 updated_call_xrefs.add((caller_address, target_address))
190 if (caller_address, target_address)
not in updated_ret_xrefs:
191 idc.AddCodeXref(caller_address, target_address, idc.fl_JF)
192 updated_ret_xrefs.add((caller_address, target_address))
194 print 'updated xrefs from runtime information:' 195 print ' jmp: {:d}'.format(len(updated_jmp_xrefs))
196 print ' call: {:d}'.format(len(updated_call_xrefs))
197 print ' ret: {:d}'.format(len(updated_ret_xrefs))
199 for (caller_address, target_address)
in updated_jmp_xrefs:
200 print '0x{:x} => 0x{:x} (jmp)'.format(caller_address, target_address)
202 for (caller_address, target_address)
in updated_call_xrefs:
203 print '0x{:x} => 0x{:x} (call)'.format(caller_address, target_address)
205 for (caller_address, target_address)
in updated_ret_xrefs:
206 print '0x{:x} => 0x{:x} (ret)'.format(caller_address, target_address)
209 def collect_indirect_jccs():
214 for seg
in idautils.Segments():
215 for head
in idautils.Heads(idc.SegStart(seg), idc.SegEnd(seg)):
216 if idc.isCode(idc.GetFlags(head)):
217 ins = idautils.DecodeInstruction(head)
219 if ins.get_canon_feature() & idaapi.CF_JUMP:
220 if ins.itype
in ida_dynamic_jump_types:
221 if ins
not in indirect_jumps:
222 indirect_jumps.append(ins)
223 elif ins.itype
in ida_dynamic_call_types:
224 if ins
not in indirect_calls:
225 indirect_calls.append(ins)
226 elif ins.itype
in ida_dynamic_ret_types:
227 if ins
not in indirect_rets:
228 indirect_rets.append(ins)
230 print 'statically detected jccs:' 231 print ' jmp: {:d}'.format(len(indirect_jumps))
232 print ' call: {:d}'.format(len(indirect_calls))
233 print ' ret: {:d}'.format(len(indirect_rets))
235 return indirect_calls + indirect_jumps + indirect_rets
238 if __name__ ==
'__main__':
239 host_port_str = idc.AskStr(
'localhost:13370',
"REVEN's project address")
240 if host_port_str
is not None:
242 host, port_str = host_port_str.split(
':')
244 print(
"REVEN's project: {}:{}").format(host, port)
247 print(
"please give a correct REVEN\'s project address, e.g. localhost:13370")
248 except RuntimeError, e:
249 print(
'{}').format(e)
251 print(
'Unknown error')