1#!/usr/bin/env python
2#
3# This is a tool that works like debug location coverage calculator.
4# It parses the llvm-dwarfdump --statistics output by reporting it
5# in a more human readable way.
6#
7
8from __future__ import print_function
9import argparse
10import os
11import sys
12from json import loads
13from math import ceil
14from collections import OrderedDict
15from subprocess import Popen, PIPE
16
17# Initialize the plot.
18def init_plot(plt):
19  plt.title('Debug Location Statistics', fontweight='bold')
20  plt.xlabel('location buckets')
21  plt.ylabel('number of variables in the location buckets')
22  plt.xticks(rotation=45, fontsize='x-small')
23  plt.yticks()
24
25# Finalize the plot.
26def finish_plot(plt):
27  plt.legend()
28  plt.grid(color='grey', which='major', axis='y', linestyle='-', linewidth=0.3)
29  plt.savefig('locstats.png')
30  print('The plot was saved within "locstats.png".')
31
32# Holds the debug location statistics.
33class LocationStats:
34  def __init__(self, file_name, variables_total, variables_total_locstats,
35    variables_with_loc, variables_scope_bytes_covered, variables_scope_bytes,
36    variables_coverage_map):
37    self.file_name = file_name
38    self.variables_total = variables_total
39    self.variables_total_locstats = variables_total_locstats
40    self.variables_with_loc = variables_with_loc
41    self.scope_bytes_covered = variables_scope_bytes_covered
42    self.scope_bytes = variables_scope_bytes
43    self.variables_coverage_map = variables_coverage_map
44
45  # Get the PC ranges coverage.
46  def get_pc_coverage(self):
47    pc_ranges_covered = int(ceil(self.scope_bytes_covered * 100.0) \
48                / self.scope_bytes)
49    return pc_ranges_covered
50
51  # Pretty print the debug location buckets.
52  def pretty_print(self):
53    if self.scope_bytes == 0:
54      print ('No scope bytes found.')
55      return -1
56
57    pc_ranges_covered = self.get_pc_coverage()
58    variables_coverage_per_map = {}
59    for cov_bucket in coverage_buckets():
60      variables_coverage_per_map[cov_bucket] = \
61        int(ceil(self.variables_coverage_map[cov_bucket] * 100.0) \
62                 / self.variables_total_locstats)
63
64    print (' =================================================')
65    print ('            Debug Location Statistics       ')
66    print (' =================================================')
67    print ('     cov%           samples         percentage(~)  ')
68    print (' -------------------------------------------------')
69    for cov_bucket in coverage_buckets():
70      print ('   {0:10}     {1:8d}              {2:3d}%'. \
71        format(cov_bucket, self.variables_coverage_map[cov_bucket], \
72               variables_coverage_per_map[cov_bucket]))
73    print (' =================================================')
74    print (' -the number of debug variables processed: ' \
75      + str(self.variables_total_locstats))
76    print (' -PC ranges covered: ' + str(pc_ranges_covered) + '%')
77
78    # Only if we are processing all the variables output the total
79    # availability.
80    if self.variables_total and self.variables_with_loc:
81      total_availability = int(ceil(self.variables_with_loc * 100.0) \
82                                    / self.variables_total)
83      print (' -------------------------------------------------')
84      print (' -total availability: ' + str(total_availability) + '%')
85    print (' =================================================')
86
87    return 0
88
89  # Draw a plot representing the location buckets.
90  def draw_plot(self):
91    from matplotlib import pyplot as plt
92
93    buckets = range(len(self.variables_coverage_map))
94    plt.figure(figsize=(12, 8))
95    init_plot(plt)
96    plt.bar(buckets, self.variables_coverage_map.values(), align='center',
97            tick_label=self.variables_coverage_map.keys(),
98            label='variables of {}'.format(self.file_name))
99
100    # Place the text box with the coverage info.
101    pc_ranges_covered = self.get_pc_coverage()
102    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
103    plt.text(0.02, 0.90, 'PC ranges covered: {}%'.format(pc_ranges_covered),
104             transform=plt.gca().transAxes, fontsize=12,
105             verticalalignment='top', bbox=props)
106
107    finish_plot(plt)
108
109  # Compare the two LocationStats objects and draw a plot showing
110  # the difference.
111  def draw_location_diff(self, locstats_to_compare):
112    from matplotlib import pyplot as plt
113
114    pc_ranges_covered = self.get_pc_coverage()
115    pc_ranges_covered_to_compare = locstats_to_compare.get_pc_coverage()
116
117    buckets = range(len(self.variables_coverage_map))
118    buckets_to_compare = range(len(locstats_to_compare.variables_coverage_map))
119
120    fig = plt.figure(figsize=(12, 8))
121    ax = fig.add_subplot(111)
122    init_plot(plt)
123
124    comparison_keys = list(coverage_buckets())
125    ax.bar(buckets, self.variables_coverage_map.values(), align='edge',
126           width=0.4,
127           label='variables of {}'.format(self.file_name))
128    ax.bar(buckets_to_compare,
129           locstats_to_compare.variables_coverage_map.values(),
130           color='r', align='edge', width=-0.4,
131           label='variables of {}'.format(locstats_to_compare.file_name))
132    ax.set_xticks(range(len(comparison_keys)))
133    ax.set_xticklabels(comparison_keys)
134
135    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
136    plt.text(0.02, 0.88,
137             '{} PC ranges covered: {}%'. \
138             format(self.file_name, pc_ranges_covered),
139             transform=plt.gca().transAxes, fontsize=12,
140             verticalalignment='top', bbox=props)
141    plt.text(0.02, 0.83,
142             '{} PC ranges covered: {}%'. \
143             format(locstats_to_compare.file_name,
144                    pc_ranges_covered_to_compare),
145             transform=plt.gca().transAxes, fontsize=12,
146             verticalalignment='top', bbox=props)
147
148    finish_plot(plt)
149
150# Define the location buckets.
151def coverage_buckets():
152  yield '0%'
153  yield '(0%,10%)'
154  for start in range(10, 91, 10):
155    yield '[{0}%,{1}%)'.format(start, start + 10)
156  yield '100%'
157
158# Parse the JSON representing the debug statistics, and create a
159# LocationStats object.
160def parse_locstats(opts, binary):
161  # These will be different due to different options enabled.
162  variables_total = None
163  variables_total_locstats = None
164  variables_with_loc = None
165  variables_scope_bytes_covered = None
166  variables_scope_bytes = None
167  variables_scope_bytes_entry_values = None
168  variables_coverage_map = OrderedDict()
169
170  # Get the directory of the LLVM tools.
171  llvm_dwarfdump_cmd = os.path.join(os.path.dirname(__file__), \
172                                    "llvm-dwarfdump")
173  # The statistics llvm-dwarfdump option.
174  llvm_dwarfdump_stats_opt = "--statistics"
175
176  # Generate the stats with the llvm-dwarfdump.
177  subproc = Popen([llvm_dwarfdump_cmd, llvm_dwarfdump_stats_opt, binary], \
178                  stdin=PIPE, stdout=PIPE, stderr=PIPE, \
179                  universal_newlines = True)
180  cmd_stdout, cmd_stderr = subproc.communicate()
181
182  # Get the JSON and parse it.
183  json_parsed = None
184
185  try:
186    json_parsed = loads(cmd_stdout)
187  except:
188    print ('error: No valid llvm-dwarfdump statistics found.')
189    sys.exit(1)
190
191  # TODO: Parse the statistics Version from JSON.
192
193  if opts.only_variables:
194    # Read the JSON only for local variables.
195    variables_total_locstats = \
196      json_parsed['#local vars processed by location statistics']
197    variables_scope_bytes_covered = \
198      json_parsed['sum_all_local_vars(#bytes in parent scope covered' \
199                  ' by DW_AT_location)']
200    variables_scope_bytes = \
201      json_parsed['sum_all_local_vars(#bytes in parent scope)']
202    if not opts.ignore_debug_entry_values:
203      for cov_bucket in coverage_buckets():
204        cov_category = "#local vars with {} of parent scope covered " \
205                       "by DW_AT_location".format(cov_bucket)
206        variables_coverage_map[cov_bucket] = json_parsed[cov_category]
207    else:
208      variables_scope_bytes_entry_values = \
209        json_parsed['sum_all_local_vars(#bytes in parent scope ' \
210                    'covered by DW_OP_entry_value)']
211      variables_scope_bytes_covered = variables_scope_bytes_covered \
212         - variables_scope_bytes_entry_values
213      for cov_bucket in coverage_buckets():
214        cov_category = \
215          "#local vars - entry values with {} of parent scope " \
216          "covered by DW_AT_location".format(cov_bucket)
217        variables_coverage_map[cov_bucket] = json_parsed[cov_category]
218  elif opts.only_formal_parameters:
219    # Read the JSON only for formal parameters.
220    variables_total_locstats = \
221      json_parsed['#params processed by location statistics']
222    variables_scope_bytes_covered = \
223      json_parsed['sum_all_params(#bytes in parent scope covered ' \
224                  'by DW_AT_location)']
225    variables_scope_bytes = \
226      json_parsed['sum_all_params(#bytes in parent scope)']
227    if not opts.ignore_debug_entry_values:
228      for cov_bucket in coverage_buckets():
229        cov_category = "#params with {} of parent scope covered " \
230                       "by DW_AT_location".format(cov_bucket)
231        variables_coverage_map[cov_bucket] = json_parsed[cov_category]
232    else:
233      variables_scope_bytes_entry_values = \
234        json_parsed['sum_all_params(#bytes in parent scope covered ' \
235                    'by DW_OP_entry_value)']
236      variables_scope_bytes_covered = variables_scope_bytes_covered \
237        - variables_scope_bytes_entry_values
238      for cov_bucket in coverage_buckets():
239        cov_category = \
240          "#params - entry values with {} of parent scope covered" \
241          " by DW_AT_location".format(cov_bucket)
242        variables_coverage_map[cov_bucket] = json_parsed[cov_category]
243  else:
244    # Read the JSON for both local variables and formal parameters.
245    variables_total = \
246      json_parsed['#source variables']
247    variables_with_loc = json_parsed['#source variables with location']
248    variables_total_locstats = \
249      json_parsed['#variables processed by location statistics']
250    variables_scope_bytes_covered = \
251      json_parsed['sum_all_variables(#bytes in parent scope covered ' \
252                  'by DW_AT_location)']
253    variables_scope_bytes = \
254      json_parsed['sum_all_variables(#bytes in parent scope)']
255    if not opts.ignore_debug_entry_values:
256      for cov_bucket in coverage_buckets():
257        cov_category = "#variables with {} of parent scope covered " \
258                       "by DW_AT_location".format(cov_bucket)
259        variables_coverage_map[cov_bucket] = json_parsed[cov_category]
260    else:
261      variables_scope_bytes_entry_values = \
262        json_parsed['sum_all_variables(#bytes in parent scope covered ' \
263                    'by DW_OP_entry_value)']
264      variables_scope_bytes_covered = variables_scope_bytes_covered \
265        - variables_scope_bytes_entry_values
266      for cov_bucket in coverage_buckets():
267        cov_category = \
268          "#variables - entry values with {} of parent scope covered " \
269          "by DW_AT_location".format(cov_bucket)
270        variables_coverage_map[cov_bucket] = json_parsed[cov_category]
271
272  return LocationStats(binary, variables_total, variables_total_locstats,
273                       variables_with_loc, variables_scope_bytes_covered,
274                       variables_scope_bytes, variables_coverage_map)
275
276# Parse the program arguments.
277def parse_program_args(parser):
278  parser.add_argument('--only-variables', action='store_true', default=False,
279            help='calculate the location statistics only for local variables')
280  parser.add_argument('--only-formal-parameters', action='store_true',
281            default=False,
282            help='calculate the location statistics only for formal parameters')
283  parser.add_argument('--ignore-debug-entry-values', action='store_true',
284            default=False,
285            help='ignore the location statistics on locations with '
286                 'entry values')
287  parser.add_argument('--draw-plot', action='store_true', default=False,
288            help='show histogram of location buckets generated (requires '
289                 'matplotlib)')
290  parser.add_argument('--compare', action='store_true', default=False,
291            help='compare the debug location coverage on two files provided, '
292                 'and draw a plot showing the difference  (requires '
293                 'matplotlib)')
294  parser.add_argument('file_names', nargs='+', type=str, help='file to process')
295
296  return parser.parse_args()
297
298# Verify that the program inputs meet the requirements.
299def verify_program_inputs(opts):
300  if len(sys.argv) < 2:
301    print ('error: Too few arguments.')
302    return False
303
304  if opts.only_variables and opts.only_formal_parameters:
305    print ('error: Please use just one --only* option.')
306    return False
307
308  if not opts.compare and len(opts.file_names) != 1:
309    print ('error: Please specify only one file to process.')
310    return False
311
312  if opts.compare and len(opts.file_names) != 2:
313    print ('error: Please specify two files to process.')
314    return False
315
316  if opts.draw_plot or opts.compare:
317    try:
318      import matplotlib
319    except ImportError:
320      print('error: matplotlib not found.')
321      return False
322
323  return True
324
325def Main():
326  parser = argparse.ArgumentParser()
327  opts = parse_program_args(parser)
328
329  if not verify_program_inputs(opts):
330    parser.print_help()
331    sys.exit(1)
332
333  binary_file = opts.file_names[0]
334  locstats = parse_locstats(opts, binary_file)
335
336  if not opts.compare:
337    if opts.draw_plot:
338      # Draw a histogram representing the location buckets.
339      locstats.draw_plot()
340    else:
341      # Pretty print collected info on the standard output.
342      if locstats.pretty_print() == -1:
343        sys.exit(0)
344  else:
345    binary_file_to_compare = opts.file_names[1]
346    locstats_to_compare = parse_locstats(opts, binary_file_to_compare)
347    # Draw a plot showing the difference in debug location coverage between
348    # two files.
349    locstats.draw_location_diff(locstats_to_compare)
350
351if __name__ == '__main__':
352  Main()
353  sys.exit(0)
354