REVEN-Axion 2017v1.4.2
ida_basicblock_simple.py

This is an example of IDA script using the REVEN Python API. Must be run from IDA!

See the script's documentation below for more information:

1 """
2 Purpose:
3  This script is a proof-of-concept for simple program profiling: each basic
4  block in a program will be colorized depending its execution frequency, one
5  can also observe basic block frequencies in the output window of IDA.
6 
7 Usage:
8  Use IDA to load the binary, and give the following arguments before
9  executing the script:
10  host = your REVEN server name, and
11  port = your REVEN's project port on this host.
12 
13 Remark:
14  For each basic block obtained from IDA's static analysis, the script counts
15  its occurrence frequency in the trace given by REVEN. The following limits
16  are known:
17  - the name of the binary must be obtained from REVEN (that is usually the
18  case if "dump process" is executed properly), and be case insentively
19  identical with the one analyzed in IDA (i.e. we do not rename the binary)
20 
21  - the binary must be mapped at a unique virtual base address in the REVEN's
22  project trace
23 
24  - very poor results for packed (or self-modifying) binaries (since the
25  limit of IDA's static analysis)
26 """
27 
28 import idaapi
29 import idautils
30 import idc
31 
32 import reven
33 
34 
35 def main(host, port):
36  project = reven.Project(host, port)
37 
38  runtime_base_address = get_base_address(project)
39  if runtime_base_address is not None:
40  static_base_address = idaapi.get_imagebase()
41 
42  print 'base addresses:'
43  print ' static: 0x{:x}'.format(idaapi.get_imagebase())
44  print ' runtime: 0x{:x}'.format(runtime_base_address)
45 
46  static_bbs = collect_basic_blocks()
47  binary_trace = get_binary_trace(project)
48  offset = runtime_base_address - static_base_address
49 
50  bb_freq = calculate_frequency(static_bbs, binary_trace, offset)
51  colorize_basic_blocks(static_bbs, bb_freq)
52 
53  print 'colorizing basic blocks done.'
54  else:
55  print 'cannot find the binary {:s} in the REVEN\'s trace or it is mapped at different base addresses.'.format(os.path.basename(idc.GetInputFilePath()))
56 
57 
58 def get_base_address(reven_project):
59  """
60  Look for the current IDA's binary name in the REVEN's binary mapping information.
61  If the binary is found, return the base address it is mapped at, else return None.
62  """
63  base_address = None
64  binary_name = os.path.basename(idc.GetInputFilePath()).lower() # The path information is completely irrelevant,
65  # so we will match against the binary name only
66  loaded_binaries = reven_project.binaries()
67  for bin_path in loaded_binaries:
68  if binary_name == os.path.basename(bin_path).lower():
69  bin_mappings = loaded_binaries[bin_path].mappings
70 
71  # Reven stores all the base addresses this binary is mapped at during the trace.
72  # There could be more than one if more than one process uses this binary
73  for address_space in bin_mappings.values():
74  if base_address is None:
75  base_address = address_space.base_address
76  elif base_address != address_space.base_address:
77  # More than one process uses the binary, and at different addresses!
78  # We don't have enough information to pick one.
79  return None
80 
81  return base_address
82 
83 
84 def get_binary_trace(reven_project):
85  """
86  Return a python generator that filters the trace on the current IDA's binary only.
87  """
88  # Select the main trace
89  trace = reven_project.trace('Execution run')
90  # Or in a better way:
91  # trace = project.traces()[0]
92 
93  # Again, the binary's path is irrelevant, match against its name only.
94  binary_name = os.path.basename(idc.GetInputFilePath())
95 
96  # See the documentation for more information, but note that this defaults to a "contains" matching algorithm.
97  # It may therfore include more that the binary we want.
98  reven_points = trace.search_point([reven.BinaryCriterion(pattern=binary_name,
99  case_sensitive=False)])
100  return reven_points
101 
102 
103 def calculate_frequency(ida_bbs, reven_trace, relocation_offset):
104  bb_freq = {bb.startEA + relocation_offset: 0 for bb in ida_bbs}
105 
106  # The generator we declared earlier will return the first point of each sequence only, not all the points.
107  # We could then iterate on the points using `next`, but since we only want the instruction's addresses and nothing
108  # relevant to this instant in the trace (memory, cpu, etc), we will iterate on the basic_block's instructions instead.
109  for head_point in reven_trace:
110  bb = head_point.basic_block
111 
112  # Note we use the default BasicBlock's iterator
113  for ins in bb:
114  ins_addr = ins.address
115  if ins_addr in bb_freq:
116  bb_freq[ins_addr] += 1
117  return {addr - relocation_offset: bb_freq[addr] for addr in bb_freq}
118 
119 
120 def collect_basic_blocks():
121  bbs = set()
122  for fun_head in idautils.Functions():
123  fun_flowchart = idaapi.FlowChart(idaapi.get_func(fun_head))
124  for bb in fun_flowchart:
125  # In some cases, IDA's analysis gives incorrect results, e.g.
126  # 1. startEA and endEA are identical
127  if (bb.startEA != bb.endEA):
128  bbs.add(bb)
129 
130  return bbs
131 
132 
133 def color_gradient(bb_freq):
134  min_freq = bb_freq.itervalues().next()
135  max_freq = min_freq
136  for bb_addr in bb_freq:
137  if min_freq > bb_freq[bb_addr]:
138  min_freq = bb_freq[bb_addr]
139  if max_freq < bb_freq[bb_addr]:
140  max_freq = bb_freq[bb_addr]
141 
142  # calculate the green component of the basic block's color
143  bb_color = {}
144  max_color = 0xff
145  if max_freq == min_freq:
146  for bb_addr in bb_freq:
147  bb_color[bb_addr] = max_color
148  else:
149  color_step = float(max_color) / (max_freq - min_freq)
150  for bb_addr in bb_freq:
151  bb_color[bb_addr] = max_color - int(round((bb_freq[bb_addr] - min_freq) * color_step))
152 
153  return bb_color
154 
155 
156 def colorize_basic_block(bb, color):
157  addr = bb.startEA
158  limit_addr = bb.endEA
159  while addr < limit_addr:
160  idaapi.set_item_color(addr, color)
161  addr = idc.NextHead(addr)
162 
163 
164 def colorize_basic_blocks(static_bbs, bb_freq):
165  green_colors = color_gradient(bb_freq)
166 
167  bb_dict = {bb.startEA: bb for bb in static_bbs}
168 
169  print '{:s}\t{:s}'.format('BAddr', 'Freq')
170 
171  for bb_head_addr in bb_dict:
172  if bb_freq[bb_head_addr] > 0:
173  # BBGGRR color format: BB = 0xb0, GG = green_colors[bb_head_addr], RR = 0xff
174  color = 0xb0 * 0x10000 + green_colors[bb_head_addr] * 0x100 + 0xff
175  colorize_basic_block(bb_dict[bb_head_addr], color)
176 
177  print '0x{:x}\t{:d}'.format(bb_head_addr, bb_freq[bb_head_addr])
178 
179 
180 if __name__ == '__main__':
181  host_port_str = idc.AskStr('localhost:13370', "REVEN's project address")
182  if host_port_str is not None:
183  try:
184  host, port_str = host_port_str.split(':')
185  port = int(port_str)
186  print("REVEN's project: {}:{}").format(host, port)
187  main(host, port)
188  except ValueError:
189  print("please give a correct REVEN\'s project address, e.g. localhost:13370")
190  except RuntimeError, e:
191  print('{}').format(e)
192  except:
193  print('Unknown error')