1#!/usr/bin/env python3
2# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
3"""Convert directories of JSON events to C code."""
4import argparse
5import csv
6from functools import lru_cache
7import json
8import metric
9import os
10import sys
11from typing import (Callable, Dict, Optional, Sequence, Set, Tuple)
12import collections
13
14# Global command line arguments.
15_args = None
16# List of regular event tables.
17_event_tables = []
18# List of event tables generated from "/sys" directories.
19_sys_event_tables = []
20# List of regular metric tables.
21_metric_tables = []
22# List of metric tables generated from "/sys" directories.
23_sys_metric_tables = []
24# Mapping between sys event table names and sys metric table names.
25_sys_event_table_to_metric_table_mapping = {}
26# Map from an event name to an architecture standard
27# JsonEvent. Architecture standard events are in json files in the top
28# f'{_args.starting_dir}/{_args.arch}' directory.
29_arch_std_events = {}
30# Events to write out when the table is closed
31_pending_events = []
32# Name of events table to be written out
33_pending_events_tblname = None
34# Metrics to write out when the table is closed
35_pending_metrics = []
36# Name of metrics table to be written out
37_pending_metrics_tblname = None
38# Global BigCString shared by all structures.
39_bcs = None
40# Map from the name of a metric group to a description of the group.
41_metricgroups = {}
42# Order specific JsonEvent attributes will be visited.
43_json_event_attributes = [
44    # cmp_sevent related attributes.
45    'name', 'topic', 'desc',
46    # Seems useful, put it early.
47    'event',
48    # Short things in alphabetical order.
49    'compat', 'deprecated', 'perpkg', 'unit',
50    # Longer things (the last won't be iterated over during decompress).
51    'long_desc'
52]
53
54# Attributes that are in pmu_metric rather than pmu_event.
55_json_metric_attributes = [
56    'metric_name', 'metric_group', 'metric_expr', 'metric_threshold',
57    'desc', 'long_desc', 'unit', 'compat', 'metricgroup_no_group',
58    'default_metricgroup_name', 'aggr_mode', 'event_grouping'
59]
60# Attributes that are bools or enum int values, encoded as '0', '1',...
61_json_enum_attributes = ['aggr_mode', 'deprecated', 'event_grouping', 'perpkg']
62
63def removesuffix(s: str, suffix: str) -> str:
64  """Remove the suffix from a string
65
66  The removesuffix function is added to str in Python 3.9. We aim for 3.6
67  compatibility and so provide our own function here.
68  """
69  return s[0:-len(suffix)] if s.endswith(suffix) else s
70
71
72def file_name_to_table_name(prefix: str, parents: Sequence[str],
73                            dirname: str) -> str:
74  """Generate a C table name from directory names."""
75  tblname = prefix
76  for p in parents:
77    tblname += '_' + p
78  tblname += '_' + dirname
79  return tblname.replace('-', '_')
80
81
82def c_len(s: str) -> int:
83  """Return the length of s a C string
84
85  This doesn't handle all escape characters properly. It first assumes
86  all \\ are for escaping, it then adjusts as it will have over counted
87  \\. The code uses \000 rather than \0 as a terminator as an adjacent
88  number would be folded into a string of \0 (ie. "\0" + "5" doesn't
89  equal a terminator followed by the number 5 but the escape of
90  \05). The code adjusts for \000 but not properly for all octal, hex
91  or unicode values.
92  """
93  try:
94    utf = s.encode(encoding='utf-8',errors='strict')
95  except:
96    print(f'broken string {s}')
97    raise
98  return len(utf) - utf.count(b'\\') + utf.count(b'\\\\') - (utf.count(b'\\000') * 2)
99
100class BigCString:
101  """A class to hold many strings concatenated together.
102
103  Generating a large number of stand-alone C strings creates a large
104  number of relocations in position independent code. The BigCString
105  is a helper for this case. It builds a single string which within it
106  are all the other C strings (to avoid memory issues the string
107  itself is held as a list of strings). The offsets within the big
108  string are recorded and when stored to disk these don't need
109  relocation. To reduce the size of the string further, identical
110  strings are merged. If a longer string ends-with the same value as a
111  shorter string, these entries are also merged.
112  """
113  strings: Set[str]
114  big_string: Sequence[str]
115  offsets: Dict[str, int]
116  insert_number: int
117  insert_point: Dict[str, int]
118  metrics: Set[str]
119
120  def __init__(self):
121    self.strings = set()
122    self.insert_number = 0;
123    self.insert_point = {}
124    self.metrics = set()
125
126  def add(self, s: str, metric: bool) -> None:
127    """Called to add to the big string."""
128    if s not in self.strings:
129      self.strings.add(s)
130      self.insert_point[s] = self.insert_number
131      self.insert_number += 1
132      if metric:
133        self.metrics.add(s)
134
135  def compute(self) -> None:
136    """Called once all strings are added to compute the string and offsets."""
137
138    folded_strings = {}
139    # Determine if two strings can be folded, ie. let 1 string use the
140    # end of another. First reverse all strings and sort them.
141    sorted_reversed_strings = sorted([x[::-1] for x in self.strings])
142
143    # Strings 'xyz' and 'yz' will now be [ 'zy', 'zyx' ]. Scan forward
144    # for each string to see if there is a better candidate to fold it
145    # into, in the example rather than using 'yz' we can use'xyz' at
146    # an offset of 1. We record which string can be folded into which
147    # in folded_strings, we don't need to record the offset as it is
148    # trivially computed from the string lengths.
149    for pos,s in enumerate(sorted_reversed_strings):
150      best_pos = pos
151      for check_pos in range(pos + 1, len(sorted_reversed_strings)):
152        if sorted_reversed_strings[check_pos].startswith(s):
153          best_pos = check_pos
154        else:
155          break
156      if pos != best_pos:
157        folded_strings[s[::-1]] = sorted_reversed_strings[best_pos][::-1]
158
159    # Compute reverse mappings for debugging.
160    fold_into_strings = collections.defaultdict(set)
161    for key, val in folded_strings.items():
162      if key != val:
163        fold_into_strings[val].add(key)
164
165    # big_string_offset is the current location within the C string
166    # being appended to - comments, etc. don't count. big_string is
167    # the string contents represented as a list. Strings are immutable
168    # in Python and so appending to one causes memory issues, while
169    # lists are mutable.
170    big_string_offset = 0
171    self.big_string = []
172    self.offsets = {}
173
174    def string_cmp_key(s: str) -> Tuple[bool, int, str]:
175      return (s in self.metrics, self.insert_point[s], s)
176
177    # Emit all strings that aren't folded in a sorted manner.
178    for s in sorted(self.strings, key=string_cmp_key):
179      if s not in folded_strings:
180        self.offsets[s] = big_string_offset
181        self.big_string.append(f'/* offset={big_string_offset} */ "')
182        self.big_string.append(s)
183        self.big_string.append('"')
184        if s in fold_into_strings:
185          self.big_string.append(' /* also: ' + ', '.join(fold_into_strings[s]) + ' */')
186        self.big_string.append('\n')
187        big_string_offset += c_len(s)
188        continue
189
190    # Compute the offsets of the folded strings.
191    for s in folded_strings.keys():
192      assert s not in self.offsets
193      folded_s = folded_strings[s]
194      self.offsets[s] = self.offsets[folded_s] + c_len(folded_s) - c_len(s)
195
196_bcs = BigCString()
197
198class JsonEvent:
199  """Representation of an event loaded from a json file dictionary."""
200
201  def __init__(self, jd: dict):
202    """Constructor passed the dictionary of parsed json values."""
203
204    def llx(x: int) -> str:
205      """Convert an int to a string similar to a printf modifier of %#llx."""
206      return str(x) if x >= 0 and x < 10 else hex(x)
207
208    def fixdesc(s: str) -> str:
209      """Fix formatting issue for the desc string."""
210      if s is None:
211        return None
212      return removesuffix(removesuffix(removesuffix(s, '.  '),
213                                       '. '), '.').replace('\n', '\\n').replace(
214                                           '\"', '\\"').replace('\r', '\\r')
215
216    def convert_aggr_mode(aggr_mode: str) -> Optional[str]:
217      """Returns the aggr_mode_class enum value associated with the JSON string."""
218      if not aggr_mode:
219        return None
220      aggr_mode_to_enum = {
221          'PerChip': '1',
222          'PerCore': '2',
223      }
224      return aggr_mode_to_enum[aggr_mode]
225
226    def convert_metric_constraint(metric_constraint: str) -> Optional[str]:
227      """Returns the metric_event_groups enum value associated with the JSON string."""
228      if not metric_constraint:
229        return None
230      metric_constraint_to_enum = {
231          'NO_GROUP_EVENTS': '1',
232          'NO_GROUP_EVENTS_NMI': '2',
233          'NO_NMI_WATCHDOG': '2',
234          'NO_GROUP_EVENTS_SMT': '3',
235      }
236      return metric_constraint_to_enum[metric_constraint]
237
238    def lookup_msr(num: str) -> Optional[str]:
239      """Converts the msr number, or first in a list to the appropriate event field."""
240      if not num:
241        return None
242      msrmap = {
243          0x3F6: 'ldlat=',
244          0x1A6: 'offcore_rsp=',
245          0x1A7: 'offcore_rsp=',
246          0x3F7: 'frontend=',
247      }
248      return msrmap[int(num.split(',', 1)[0], 0)]
249
250    def real_event(name: str, event: str) -> Optional[str]:
251      """Convert well known event names to an event string otherwise use the event argument."""
252      fixed = {
253          'inst_retired.any': 'event=0xc0,period=2000003',
254          'inst_retired.any_p': 'event=0xc0,period=2000003',
255          'cpu_clk_unhalted.ref': 'event=0x0,umask=0x03,period=2000003',
256          'cpu_clk_unhalted.thread': 'event=0x3c,period=2000003',
257          'cpu_clk_unhalted.core': 'event=0x3c,period=2000003',
258          'cpu_clk_unhalted.thread_any': 'event=0x3c,any=1,period=2000003',
259      }
260      if not name:
261        return None
262      if name.lower() in fixed:
263        return fixed[name.lower()]
264      return event
265
266    def unit_to_pmu(unit: str) -> Optional[str]:
267      """Convert a JSON Unit to Linux PMU name."""
268      if not unit:
269        return 'default_core'
270      # Comment brought over from jevents.c:
271      # it's not realistic to keep adding these, we need something more scalable ...
272      table = {
273          'CBO': 'uncore_cbox',
274          'QPI LL': 'uncore_qpi',
275          'SBO': 'uncore_sbox',
276          'iMPH-U': 'uncore_arb',
277          'CPU-M-CF': 'cpum_cf',
278          'CPU-M-SF': 'cpum_sf',
279          'PAI-CRYPTO' : 'pai_crypto',
280          'PAI-EXT' : 'pai_ext',
281          'UPI LL': 'uncore_upi',
282          'hisi_sicl,cpa': 'hisi_sicl,cpa',
283          'hisi_sccl,ddrc': 'hisi_sccl,ddrc',
284          'hisi_sccl,hha': 'hisi_sccl,hha',
285          'hisi_sccl,l3c': 'hisi_sccl,l3c',
286          'imx8_ddr': 'imx8_ddr',
287          'L3PMC': 'amd_l3',
288          'DFPMC': 'amd_df',
289          'UMCPMC': 'amd_umc',
290          'cpu_core': 'cpu_core',
291          'cpu_atom': 'cpu_atom',
292          'ali_drw': 'ali_drw',
293          'arm_cmn': 'arm_cmn',
294      }
295      return table[unit] if unit in table else f'uncore_{unit.lower()}'
296
297    def is_zero(val: str) -> bool:
298        try:
299            if val.startswith('0x'):
300                return int(val, 16) == 0
301            else:
302                return int(val) == 0
303        except e:
304            return False
305
306    def canonicalize_value(val: str) -> str:
307        try:
308            if val.startswith('0x'):
309                return llx(int(val, 16))
310            return str(int(val))
311        except e:
312            return val
313
314    eventcode = 0
315    if 'EventCode' in jd:
316      eventcode = int(jd['EventCode'].split(',', 1)[0], 0)
317    if 'ExtSel' in jd:
318      eventcode |= int(jd['ExtSel']) << 8
319    configcode = int(jd['ConfigCode'], 0) if 'ConfigCode' in jd else None
320    eventidcode = int(jd['EventidCode'], 0) if 'EventidCode' in jd else None
321    self.name = jd['EventName'].lower() if 'EventName' in jd else None
322    self.topic = ''
323    self.compat = jd.get('Compat')
324    self.desc = fixdesc(jd.get('BriefDescription'))
325    self.long_desc = fixdesc(jd.get('PublicDescription'))
326    precise = jd.get('PEBS')
327    msr = lookup_msr(jd.get('MSRIndex'))
328    msrval = jd.get('MSRValue')
329    extra_desc = ''
330    if 'Data_LA' in jd:
331      extra_desc += '  Supports address when precise'
332      if 'Errata' in jd:
333        extra_desc += '.'
334    if 'Errata' in jd:
335      extra_desc += '  Spec update: ' + jd['Errata']
336    self.pmu = unit_to_pmu(jd.get('Unit'))
337    filter = jd.get('Filter')
338    self.unit = jd.get('ScaleUnit')
339    self.perpkg = jd.get('PerPkg')
340    self.aggr_mode = convert_aggr_mode(jd.get('AggregationMode'))
341    self.deprecated = jd.get('Deprecated')
342    self.metric_name = jd.get('MetricName')
343    self.metric_group = jd.get('MetricGroup')
344    self.metricgroup_no_group = jd.get('MetricgroupNoGroup')
345    self.default_metricgroup_name = jd.get('DefaultMetricgroupName')
346    self.event_grouping = convert_metric_constraint(jd.get('MetricConstraint'))
347    self.metric_expr = None
348    if 'MetricExpr' in jd:
349      self.metric_expr = metric.ParsePerfJson(jd['MetricExpr']).Simplify()
350    # Note, the metric formula for the threshold isn't parsed as the &
351    # and > have incorrect precedence.
352    self.metric_threshold = jd.get('MetricThreshold')
353
354    arch_std = jd.get('ArchStdEvent')
355    if precise and self.desc and '(Precise Event)' not in self.desc:
356      extra_desc += ' (Must be precise)' if precise == '2' else (' (Precise '
357                                                                 'event)')
358    event = None
359    if configcode is not None:
360      event = f'config={llx(configcode)}'
361    elif eventidcode is not None:
362      event = f'eventid={llx(eventidcode)}'
363    else:
364      event = f'event={llx(eventcode)}'
365    event_fields = [
366        ('AnyThread', 'any='),
367        ('PortMask', 'ch_mask='),
368        ('CounterMask', 'cmask='),
369        ('EdgeDetect', 'edge='),
370        ('FCMask', 'fc_mask='),
371        ('Invert', 'inv='),
372        ('SampleAfterValue', 'period='),
373        ('UMask', 'umask='),
374        ('NodeType', 'type='),
375        ('RdWrMask', 'rdwrmask='),
376        ('EnAllCores', 'enallcores='),
377        ('EnAllSlices', 'enallslices='),
378        ('SliceId', 'sliceid='),
379        ('ThreadMask', 'threadmask='),
380    ]
381    for key, value in event_fields:
382      if key in jd and not is_zero(jd[key]):
383        event += f',{value}{canonicalize_value(jd[key])}'
384    if filter:
385      event += f',{filter}'
386    if msr:
387      event += f',{msr}{msrval}'
388    if self.desc and extra_desc:
389      self.desc += extra_desc
390    if self.long_desc and extra_desc:
391      self.long_desc += extra_desc
392    if arch_std:
393      if arch_std.lower() in _arch_std_events:
394        event = _arch_std_events[arch_std.lower()].event
395        # Copy from the architecture standard event to self for undefined fields.
396        for attr, value in _arch_std_events[arch_std.lower()].__dict__.items():
397          if hasattr(self, attr) and not getattr(self, attr):
398            setattr(self, attr, value)
399      else:
400        raise argparse.ArgumentTypeError('Cannot find arch std event:', arch_std)
401
402    self.event = real_event(self.name, event)
403
404  def __repr__(self) -> str:
405    """String representation primarily for debugging."""
406    s = '{\n'
407    for attr, value in self.__dict__.items():
408      if value:
409        s += f'\t{attr} = {value},\n'
410    return s + '}'
411
412  def build_c_string(self, metric: bool) -> str:
413    s = ''
414    for attr in _json_metric_attributes if metric else _json_event_attributes:
415      x = getattr(self, attr)
416      if metric and x and attr == 'metric_expr':
417        # Convert parsed metric expressions into a string. Slashes
418        # must be doubled in the file.
419        x = x.ToPerfJson().replace('\\', '\\\\')
420      if metric and x and attr == 'metric_threshold':
421        x = x.replace('\\', '\\\\')
422      if attr in _json_enum_attributes:
423        s += x if x else '0'
424      else:
425        s += f'{x}\\000' if x else '\\000'
426    return s
427
428  def to_c_string(self, metric: bool) -> str:
429    """Representation of the event as a C struct initializer."""
430
431    s = self.build_c_string(metric)
432    return f'{{ { _bcs.offsets[s] } }}, /* {s} */\n'
433
434
435@lru_cache(maxsize=None)
436def read_json_events(path: str, topic: str) -> Sequence[JsonEvent]:
437  """Read json events from the specified file."""
438  try:
439    events = json.load(open(path), object_hook=JsonEvent)
440  except BaseException as err:
441    print(f"Exception processing {path}")
442    raise
443  metrics: list[Tuple[str, str, metric.Expression]] = []
444  for event in events:
445    event.topic = topic
446    if event.metric_name and '-' not in event.metric_name:
447      metrics.append((event.pmu, event.metric_name, event.metric_expr))
448  updates = metric.RewriteMetricsInTermsOfOthers(metrics)
449  if updates:
450    for event in events:
451      if event.metric_name in updates:
452        # print(f'Updated {event.metric_name} from\n"{event.metric_expr}"\n'
453        #       f'to\n"{updates[event.metric_name]}"')
454        event.metric_expr = updates[event.metric_name]
455
456  return events
457
458def preprocess_arch_std_files(archpath: str) -> None:
459  """Read in all architecture standard events."""
460  global _arch_std_events
461  for item in os.scandir(archpath):
462    if item.is_file() and item.name.endswith('.json'):
463      for event in read_json_events(item.path, topic=''):
464        if event.name:
465          _arch_std_events[event.name.lower()] = event
466        if event.metric_name:
467          _arch_std_events[event.metric_name.lower()] = event
468
469
470def add_events_table_entries(item: os.DirEntry, topic: str) -> None:
471  """Add contents of file to _pending_events table."""
472  for e in read_json_events(item.path, topic):
473    if e.name:
474      _pending_events.append(e)
475    if e.metric_name:
476      _pending_metrics.append(e)
477
478
479def print_pending_events() -> None:
480  """Optionally close events table."""
481
482  def event_cmp_key(j: JsonEvent) -> Tuple[str, str, bool, str, str]:
483    def fix_none(s: Optional[str]) -> str:
484      if s is None:
485        return ''
486      return s
487
488    return (fix_none(j.pmu).replace(',','_'), fix_none(j.name), j.desc is not None, fix_none(j.topic),
489            fix_none(j.metric_name))
490
491  global _pending_events
492  if not _pending_events:
493    return
494
495  global _pending_events_tblname
496  if _pending_events_tblname.endswith('_sys'):
497    global _sys_event_tables
498    _sys_event_tables.append(_pending_events_tblname)
499  else:
500    global event_tables
501    _event_tables.append(_pending_events_tblname)
502
503  first = True
504  last_pmu = None
505  pmus = set()
506  for event in sorted(_pending_events, key=event_cmp_key):
507    if event.pmu != last_pmu:
508      if not first:
509        _args.output_file.write('};\n')
510      pmu_name = event.pmu.replace(',', '_')
511      _args.output_file.write(
512          f'static const struct compact_pmu_event {_pending_events_tblname}_{pmu_name}[] = {{\n')
513      first = False
514      last_pmu = event.pmu
515      pmus.add((event.pmu, pmu_name))
516
517    _args.output_file.write(event.to_c_string(metric=False))
518  _pending_events = []
519
520  _args.output_file.write(f"""
521}};
522
523const struct pmu_table_entry {_pending_events_tblname}[] = {{
524""")
525  for (pmu, tbl_pmu) in sorted(pmus):
526    pmu_name = f"{pmu}\\000"
527    _args.output_file.write(f"""{{
528     .entries = {_pending_events_tblname}_{tbl_pmu},
529     .num_entries = ARRAY_SIZE({_pending_events_tblname}_{tbl_pmu}),
530     .pmu_name = {{ {_bcs.offsets[pmu_name]} /* {pmu_name} */ }},
531}},
532""")
533  _args.output_file.write('};\n\n')
534
535def print_pending_metrics() -> None:
536  """Optionally close metrics table."""
537
538  def metric_cmp_key(j: JsonEvent) -> Tuple[bool, str, str]:
539    def fix_none(s: Optional[str]) -> str:
540      if s is None:
541        return ''
542      return s
543
544    return (j.desc is not None, fix_none(j.pmu), fix_none(j.metric_name))
545
546  global _pending_metrics
547  if not _pending_metrics:
548    return
549
550  global _pending_metrics_tblname
551  if _pending_metrics_tblname.endswith('_sys'):
552    global _sys_metric_tables
553    _sys_metric_tables.append(_pending_metrics_tblname)
554  else:
555    global metric_tables
556    _metric_tables.append(_pending_metrics_tblname)
557
558  first = True
559  last_pmu = None
560  pmus = set()
561  for metric in sorted(_pending_metrics, key=metric_cmp_key):
562    if metric.pmu != last_pmu:
563      if not first:
564        _args.output_file.write('};\n')
565      pmu_name = metric.pmu.replace(',', '_')
566      _args.output_file.write(
567          f'static const struct compact_pmu_event {_pending_metrics_tblname}_{pmu_name}[] = {{\n')
568      first = False
569      last_pmu = metric.pmu
570      pmus.add((metric.pmu, pmu_name))
571
572    _args.output_file.write(metric.to_c_string(metric=True))
573  _pending_metrics = []
574
575  _args.output_file.write(f"""
576}};
577
578const struct pmu_table_entry {_pending_metrics_tblname}[] = {{
579""")
580  for (pmu, tbl_pmu) in sorted(pmus):
581    pmu_name = f"{pmu}\\000"
582    _args.output_file.write(f"""{{
583     .entries = {_pending_metrics_tblname}_{tbl_pmu},
584     .num_entries = ARRAY_SIZE({_pending_metrics_tblname}_{tbl_pmu}),
585     .pmu_name = {{ {_bcs.offsets[pmu_name]} /* {pmu_name} */ }},
586}},
587""")
588  _args.output_file.write('};\n\n')
589
590def get_topic(topic: str) -> str:
591  if topic.endswith('metrics.json'):
592    return 'metrics'
593  return removesuffix(topic, '.json').replace('-', ' ')
594
595def preprocess_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
596
597  if item.is_dir():
598    return
599
600  # base dir or too deep
601  level = len(parents)
602  if level == 0 or level > 4:
603    return
604
605  # Ignore other directories. If the file name does not have a .json
606  # extension, ignore it. It could be a readme.txt for instance.
607  if not item.is_file() or not item.name.endswith('.json'):
608    return
609
610  if item.name == 'metricgroups.json':
611    metricgroup_descriptions = json.load(open(item.path))
612    for mgroup in metricgroup_descriptions:
613      assert len(mgroup) > 1, parents
614      description = f"{metricgroup_descriptions[mgroup]}\\000"
615      mgroup = f"{mgroup}\\000"
616      _bcs.add(mgroup, metric=True)
617      _bcs.add(description, metric=True)
618      _metricgroups[mgroup] = description
619    return
620
621  topic = get_topic(item.name)
622  for event in read_json_events(item.path, topic):
623    pmu_name = f"{event.pmu}\\000"
624    if event.name:
625      _bcs.add(pmu_name, metric=False)
626      _bcs.add(event.build_c_string(metric=False), metric=False)
627    if event.metric_name:
628      _bcs.add(pmu_name, metric=True)
629      _bcs.add(event.build_c_string(metric=True), metric=True)
630
631def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
632  """Process a JSON file during the main walk."""
633  def is_leaf_dir(path: str) -> bool:
634    for item in os.scandir(path):
635      if item.is_dir():
636        return False
637    return True
638
639  # model directory, reset topic
640  if item.is_dir() and is_leaf_dir(item.path):
641    print_pending_events()
642    print_pending_metrics()
643
644    global _pending_events_tblname
645    _pending_events_tblname = file_name_to_table_name('pmu_events_', parents, item.name)
646    global _pending_metrics_tblname
647    _pending_metrics_tblname = file_name_to_table_name('pmu_metrics_', parents, item.name)
648
649    if item.name == 'sys':
650      _sys_event_table_to_metric_table_mapping[_pending_events_tblname] = _pending_metrics_tblname
651    return
652
653  # base dir or too deep
654  level = len(parents)
655  if level == 0 or level > 4:
656    return
657
658  # Ignore other directories. If the file name does not have a .json
659  # extension, ignore it. It could be a readme.txt for instance.
660  if not item.is_file() or not item.name.endswith('.json') or item.name == 'metricgroups.json':
661    return
662
663  add_events_table_entries(item, get_topic(item.name))
664
665
666def print_mapping_table(archs: Sequence[str]) -> None:
667  """Read the mapfile and generate the struct from cpuid string to event table."""
668  _args.output_file.write("""
669/* Struct used to make the PMU event table implementation opaque to callers. */
670struct pmu_events_table {
671        const struct pmu_table_entry *pmus;
672        uint32_t num_pmus;
673};
674
675/* Struct used to make the PMU metric table implementation opaque to callers. */
676struct pmu_metrics_table {
677        const struct pmu_table_entry *pmus;
678        uint32_t num_pmus;
679};
680
681/*
682 * Map a CPU to its table of PMU events. The CPU is identified by the
683 * cpuid field, which is an arch-specific identifier for the CPU.
684 * The identifier specified in tools/perf/pmu-events/arch/xxx/mapfile
685 * must match the get_cpuid_str() in tools/perf/arch/xxx/util/header.c)
686 *
687 * The  cpuid can contain any character other than the comma.
688 */
689struct pmu_events_map {
690        const char *arch;
691        const char *cpuid;
692        struct pmu_events_table event_table;
693        struct pmu_metrics_table metric_table;
694};
695
696/*
697 * Global table mapping each known CPU for the architecture to its
698 * table of PMU events.
699 */
700const struct pmu_events_map pmu_events_map[] = {
701""")
702  for arch in archs:
703    if arch == 'test':
704      _args.output_file.write("""{
705\t.arch = "testarch",
706\t.cpuid = "testcpu",
707\t.event_table = {
708\t\t.pmus = pmu_events__test_soc_cpu,
709\t\t.num_pmus = ARRAY_SIZE(pmu_events__test_soc_cpu),
710\t},
711\t.metric_table = {
712\t\t.pmus = pmu_metrics__test_soc_cpu,
713\t\t.num_pmus = ARRAY_SIZE(pmu_metrics__test_soc_cpu),
714\t}
715},
716""")
717    else:
718      with open(f'{_args.starting_dir}/{arch}/mapfile.csv') as csvfile:
719        table = csv.reader(csvfile)
720        first = True
721        for row in table:
722          # Skip the first row or any row beginning with #.
723          if not first and len(row) > 0 and not row[0].startswith('#'):
724            event_tblname = file_name_to_table_name('pmu_events_', [], row[2].replace('/', '_'))
725            if event_tblname in _event_tables:
726              event_size = f'ARRAY_SIZE({event_tblname})'
727            else:
728              event_tblname = 'NULL'
729              event_size = '0'
730            metric_tblname = file_name_to_table_name('pmu_metrics_', [], row[2].replace('/', '_'))
731            if metric_tblname in _metric_tables:
732              metric_size = f'ARRAY_SIZE({metric_tblname})'
733            else:
734              metric_tblname = 'NULL'
735              metric_size = '0'
736            if event_size == '0' and metric_size == '0':
737              continue
738            cpuid = row[0].replace('\\', '\\\\')
739            _args.output_file.write(f"""{{
740\t.arch = "{arch}",
741\t.cpuid = "{cpuid}",
742\t.event_table = {{
743\t\t.pmus = {event_tblname},
744\t\t.num_pmus = {event_size}
745\t}},
746\t.metric_table = {{
747\t\t.pmus = {metric_tblname},
748\t\t.num_pmus = {metric_size}
749\t}}
750}},
751""")
752          first = False
753
754  _args.output_file.write("""{
755\t.arch = 0,
756\t.cpuid = 0,
757\t.event_table = { 0, 0 },
758\t.metric_table = { 0, 0 },
759}
760};
761""")
762
763
764def print_system_mapping_table() -> None:
765  """C struct mapping table array for tables from /sys directories."""
766  _args.output_file.write("""
767struct pmu_sys_events {
768\tconst char *name;
769\tstruct pmu_events_table event_table;
770\tstruct pmu_metrics_table metric_table;
771};
772
773static const struct pmu_sys_events pmu_sys_event_tables[] = {
774""")
775  printed_metric_tables = []
776  for tblname in _sys_event_tables:
777    _args.output_file.write(f"""\t{{
778\t\t.event_table = {{
779\t\t\t.pmus = {tblname},
780\t\t\t.num_pmus = ARRAY_SIZE({tblname})
781\t\t}},""")
782    metric_tblname = _sys_event_table_to_metric_table_mapping[tblname]
783    if metric_tblname in _sys_metric_tables:
784      _args.output_file.write(f"""
785\t\t.metric_table = {{
786\t\t\t.pmus = {metric_tblname},
787\t\t\t.num_pmus = ARRAY_SIZE({metric_tblname})
788\t\t}},""")
789      printed_metric_tables.append(metric_tblname)
790    _args.output_file.write(f"""
791\t\t.name = \"{tblname}\",
792\t}},
793""")
794  for tblname in _sys_metric_tables:
795    if tblname in printed_metric_tables:
796      continue
797    _args.output_file.write(f"""\t{{
798\t\t.metric_table = {{
799\t\t\t.pmus = {tblname},
800\t\t\t.num_pmus = ARRAY_SIZE({tblname})
801\t\t}},
802\t\t.name = \"{tblname}\",
803\t}},
804""")
805  _args.output_file.write("""\t{
806\t\t.event_table = { 0, 0 },
807\t\t.metric_table = { 0, 0 },
808\t},
809};
810
811static void decompress_event(int offset, struct pmu_event *pe)
812{
813\tconst char *p = &big_c_string[offset];
814""")
815  for attr in _json_event_attributes:
816    _args.output_file.write(f'\n\tpe->{attr} = ')
817    if attr in _json_enum_attributes:
818      _args.output_file.write("*p - '0';\n")
819    else:
820      _args.output_file.write("(*p == '\\0' ? NULL : p);\n")
821    if attr == _json_event_attributes[-1]:
822      continue
823    if attr in _json_enum_attributes:
824      _args.output_file.write('\tp++;')
825    else:
826      _args.output_file.write('\twhile (*p++);')
827  _args.output_file.write("""}
828
829static void decompress_metric(int offset, struct pmu_metric *pm)
830{
831\tconst char *p = &big_c_string[offset];
832""")
833  for attr in _json_metric_attributes:
834    _args.output_file.write(f'\n\tpm->{attr} = ')
835    if attr in _json_enum_attributes:
836      _args.output_file.write("*p - '0';\n")
837    else:
838      _args.output_file.write("(*p == '\\0' ? NULL : p);\n")
839    if attr == _json_metric_attributes[-1]:
840      continue
841    if attr in _json_enum_attributes:
842      _args.output_file.write('\tp++;')
843    else:
844      _args.output_file.write('\twhile (*p++);')
845  _args.output_file.write("""}
846
847static int pmu_events_table__for_each_event_pmu(const struct pmu_events_table *table,
848                                                const struct pmu_table_entry *pmu,
849                                                pmu_event_iter_fn fn,
850                                                void *data)
851{
852        int ret;
853        struct pmu_event pe = {
854                .pmu = &big_c_string[pmu->pmu_name.offset],
855        };
856
857        for (uint32_t i = 0; i < pmu->num_entries; i++) {
858                decompress_event(pmu->entries[i].offset, &pe);
859                if (!pe.name)
860                        continue;
861                ret = fn(&pe, table, data);
862                if (ret)
863                        return ret;
864        }
865        return 0;
866 }
867
868static int pmu_events_table__find_event_pmu(const struct pmu_events_table *table,
869                                            const struct pmu_table_entry *pmu,
870                                            const char *name,
871                                            pmu_event_iter_fn fn,
872                                            void *data)
873{
874        struct pmu_event pe = {
875                .pmu = &big_c_string[pmu->pmu_name.offset],
876        };
877        int low = 0, high = pmu->num_entries - 1;
878
879        while (low <= high) {
880                int cmp, mid = (low + high) / 2;
881
882                decompress_event(pmu->entries[mid].offset, &pe);
883
884                if (!pe.name && !name)
885                        goto do_call;
886
887                if (!pe.name && name) {
888                        low = mid + 1;
889                        continue;
890                }
891                if (pe.name && !name) {
892                        high = mid - 1;
893                        continue;
894                }
895
896                cmp = strcasecmp(pe.name, name);
897                if (cmp < 0) {
898                        low = mid + 1;
899                        continue;
900                }
901                if (cmp > 0) {
902                        high = mid - 1;
903                        continue;
904                }
905  do_call:
906                return fn ? fn(&pe, table, data) : 0;
907        }
908        return -1000;
909}
910
911int pmu_events_table__for_each_event(const struct pmu_events_table *table,
912                                    struct perf_pmu *pmu,
913                                    pmu_event_iter_fn fn,
914                                    void *data)
915{
916        for (size_t i = 0; i < table->num_pmus; i++) {
917                const struct pmu_table_entry *table_pmu = &table->pmus[i];
918                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
919                int ret;
920
921                if (pmu && !pmu__name_match(pmu, pmu_name))
922                        continue;
923
924                ret = pmu_events_table__for_each_event_pmu(table, table_pmu, fn, data);
925                if (pmu || ret)
926                        return ret;
927        }
928        return 0;
929}
930
931int pmu_events_table__find_event(const struct pmu_events_table *table,
932                                 struct perf_pmu *pmu,
933                                 const char *name,
934                                 pmu_event_iter_fn fn,
935                                 void *data)
936{
937        for (size_t i = 0; i < table->num_pmus; i++) {
938                const struct pmu_table_entry *table_pmu = &table->pmus[i];
939                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
940                int ret;
941
942                if (!pmu__name_match(pmu, pmu_name))
943                        continue;
944
945                ret = pmu_events_table__find_event_pmu(table, table_pmu, name, fn, data);
946                if (ret != -1000)
947                        return ret;
948        }
949        return -1000;
950}
951
952size_t pmu_events_table__num_events(const struct pmu_events_table *table,
953                                    struct perf_pmu *pmu)
954{
955        size_t count = 0;
956
957        for (size_t i = 0; i < table->num_pmus; i++) {
958                const struct pmu_table_entry *table_pmu = &table->pmus[i];
959                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
960
961                if (pmu__name_match(pmu, pmu_name))
962                        count += table_pmu->num_entries;
963        }
964        return count;
965}
966
967static int pmu_metrics_table__for_each_metric_pmu(const struct pmu_metrics_table *table,
968                                                const struct pmu_table_entry *pmu,
969                                                pmu_metric_iter_fn fn,
970                                                void *data)
971{
972        int ret;
973        struct pmu_metric pm = {
974                .pmu = &big_c_string[pmu->pmu_name.offset],
975        };
976
977        for (uint32_t i = 0; i < pmu->num_entries; i++) {
978                decompress_metric(pmu->entries[i].offset, &pm);
979                if (!pm.metric_expr)
980                        continue;
981                ret = fn(&pm, table, data);
982                if (ret)
983                        return ret;
984        }
985        return 0;
986}
987
988int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
989                                     pmu_metric_iter_fn fn,
990                                     void *data)
991{
992        for (size_t i = 0; i < table->num_pmus; i++) {
993                int ret = pmu_metrics_table__for_each_metric_pmu(table, &table->pmus[i],
994                                                                 fn, data);
995
996                if (ret)
997                        return ret;
998        }
999        return 0;
1000}
1001
1002static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu)
1003{
1004        static struct {
1005                const struct pmu_events_map *map;
1006                struct perf_pmu *pmu;
1007        } last_result;
1008        static struct {
1009                const struct pmu_events_map *map;
1010                char *cpuid;
1011        } last_map_search;
1012        static bool has_last_result, has_last_map_search;
1013        const struct pmu_events_map *map = NULL;
1014        char *cpuid = NULL;
1015        size_t i;
1016
1017        if (has_last_result && last_result.pmu == pmu)
1018                return last_result.map;
1019
1020        cpuid = perf_pmu__getcpuid(pmu);
1021
1022        /*
1023         * On some platforms which uses cpus map, cpuid can be NULL for
1024         * PMUs other than CORE PMUs.
1025         */
1026        if (!cpuid)
1027                goto out_update_last_result;
1028
1029        if (has_last_map_search && !strcmp(last_map_search.cpuid, cpuid)) {
1030                map = last_map_search.map;
1031                free(cpuid);
1032        } else {
1033                i = 0;
1034                for (;;) {
1035                        map = &pmu_events_map[i++];
1036
1037                        if (!map->arch) {
1038                                map = NULL;
1039                                break;
1040                        }
1041
1042                        if (!strcmp_cpuid_str(map->cpuid, cpuid))
1043                                break;
1044               }
1045               free(last_map_search.cpuid);
1046               last_map_search.cpuid = cpuid;
1047               last_map_search.map = map;
1048               has_last_map_search = true;
1049        }
1050out_update_last_result:
1051        last_result.pmu = pmu;
1052        last_result.map = map;
1053        has_last_result = true;
1054        return map;
1055}
1056
1057const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
1058{
1059        const struct pmu_events_map *map = map_for_pmu(pmu);
1060
1061        if (!map)
1062                return NULL;
1063
1064        if (!pmu)
1065                return &map->event_table;
1066
1067        for (size_t i = 0; i < map->event_table.num_pmus; i++) {
1068                const struct pmu_table_entry *table_pmu = &map->event_table.pmus[i];
1069                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
1070
1071                if (pmu__name_match(pmu, pmu_name))
1072                         return &map->event_table;
1073        }
1074        return NULL;
1075}
1076
1077const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu)
1078{
1079        const struct pmu_events_map *map = map_for_pmu(pmu);
1080
1081        if (!map)
1082                return NULL;
1083
1084        if (!pmu)
1085                return &map->metric_table;
1086
1087        for (size_t i = 0; i < map->metric_table.num_pmus; i++) {
1088                const struct pmu_table_entry *table_pmu = &map->metric_table.pmus[i];
1089                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
1090
1091                if (pmu__name_match(pmu, pmu_name))
1092                           return &map->metric_table;
1093        }
1094        return NULL;
1095}
1096
1097const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid)
1098{
1099        for (const struct pmu_events_map *tables = &pmu_events_map[0];
1100             tables->arch;
1101             tables++) {
1102                if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid))
1103                        return &tables->event_table;
1104        }
1105        return NULL;
1106}
1107
1108const struct pmu_metrics_table *find_core_metrics_table(const char *arch, const char *cpuid)
1109{
1110        for (const struct pmu_events_map *tables = &pmu_events_map[0];
1111             tables->arch;
1112             tables++) {
1113                if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid))
1114                        return &tables->metric_table;
1115        }
1116        return NULL;
1117}
1118
1119int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data)
1120{
1121        for (const struct pmu_events_map *tables = &pmu_events_map[0];
1122             tables->arch;
1123             tables++) {
1124                int ret = pmu_events_table__for_each_event(&tables->event_table,
1125                                                           /*pmu=*/ NULL, fn, data);
1126
1127                if (ret)
1128                        return ret;
1129        }
1130        return 0;
1131}
1132
1133int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data)
1134{
1135        for (const struct pmu_events_map *tables = &pmu_events_map[0];
1136             tables->arch;
1137             tables++) {
1138                int ret = pmu_metrics_table__for_each_metric(&tables->metric_table, fn, data);
1139
1140                if (ret)
1141                        return ret;
1142        }
1143        return 0;
1144}
1145
1146const struct pmu_events_table *find_sys_events_table(const char *name)
1147{
1148        for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
1149             tables->name;
1150             tables++) {
1151                if (!strcmp(tables->name, name))
1152                        return &tables->event_table;
1153        }
1154        return NULL;
1155}
1156
1157int pmu_for_each_sys_event(pmu_event_iter_fn fn, void *data)
1158{
1159        for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
1160             tables->name;
1161             tables++) {
1162                int ret = pmu_events_table__for_each_event(&tables->event_table,
1163                                                           /*pmu=*/ NULL, fn, data);
1164
1165                if (ret)
1166                        return ret;
1167        }
1168        return 0;
1169}
1170
1171int pmu_for_each_sys_metric(pmu_metric_iter_fn fn, void *data)
1172{
1173        for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
1174             tables->name;
1175             tables++) {
1176                int ret = pmu_metrics_table__for_each_metric(&tables->metric_table, fn, data);
1177
1178                if (ret)
1179                        return ret;
1180        }
1181        return 0;
1182}
1183""")
1184
1185def print_metricgroups() -> None:
1186  _args.output_file.write("""
1187static const int metricgroups[][2] = {
1188""")
1189  for mgroup in sorted(_metricgroups):
1190    description = _metricgroups[mgroup]
1191    _args.output_file.write(
1192        f'\t{{ {_bcs.offsets[mgroup]}, {_bcs.offsets[description]} }}, /* {mgroup} => {description} */\n'
1193    )
1194  _args.output_file.write("""
1195};
1196
1197const char *describe_metricgroup(const char *group)
1198{
1199        int low = 0, high = (int)ARRAY_SIZE(metricgroups) - 1;
1200
1201        while (low <= high) {
1202                int mid = (low + high) / 2;
1203                const char *mgroup = &big_c_string[metricgroups[mid][0]];
1204                int cmp = strcmp(mgroup, group);
1205
1206                if (cmp == 0) {
1207                        return &big_c_string[metricgroups[mid][1]];
1208                } else if (cmp < 0) {
1209                        low = mid + 1;
1210                } else {
1211                        high = mid - 1;
1212                }
1213        }
1214        return NULL;
1215}
1216""")
1217
1218def main() -> None:
1219  global _args
1220
1221  def dir_path(path: str) -> str:
1222    """Validate path is a directory for argparse."""
1223    if os.path.isdir(path):
1224      return path
1225    raise argparse.ArgumentTypeError(f'\'{path}\' is not a valid directory')
1226
1227  def ftw(path: str, parents: Sequence[str],
1228          action: Callable[[Sequence[str], os.DirEntry], None]) -> None:
1229    """Replicate the directory/file walking behavior of C's file tree walk."""
1230    for item in sorted(os.scandir(path), key=lambda e: e.name):
1231      if _args.model != 'all' and item.is_dir():
1232        # Check if the model matches one in _args.model.
1233        if len(parents) == _args.model.split(',')[0].count('/'):
1234          # We're testing the correct directory.
1235          item_path = '/'.join(parents) + ('/' if len(parents) > 0 else '') + item.name
1236          if 'test' not in item_path and item_path not in _args.model.split(','):
1237            continue
1238      action(parents, item)
1239      if item.is_dir():
1240        ftw(item.path, parents + [item.name], action)
1241
1242  ap = argparse.ArgumentParser()
1243  ap.add_argument('arch', help='Architecture name like x86')
1244  ap.add_argument('model', help='''Select a model such as skylake to
1245reduce the code size.  Normally set to "all". For architectures like
1246ARM64 with an implementor/model, the model must include the implementor
1247such as "arm/cortex-a34".''',
1248                  default='all')
1249  ap.add_argument(
1250      'starting_dir',
1251      type=dir_path,
1252      help='Root of tree containing architecture directories containing json files'
1253  )
1254  ap.add_argument(
1255      'output_file', type=argparse.FileType('w', encoding='utf-8'), nargs='?', default=sys.stdout)
1256  _args = ap.parse_args()
1257
1258  _args.output_file.write("""
1259#include <pmu-events/pmu-events.h>
1260#include "util/header.h"
1261#include "util/pmu.h"
1262#include <string.h>
1263#include <stddef.h>
1264
1265struct compact_pmu_event {
1266        int offset;
1267};
1268
1269struct pmu_table_entry {
1270        const struct compact_pmu_event *entries;
1271        uint32_t num_entries;
1272        struct compact_pmu_event pmu_name;
1273};
1274
1275""")
1276  archs = []
1277  for item in os.scandir(_args.starting_dir):
1278    if not item.is_dir():
1279      continue
1280    if item.name == _args.arch or _args.arch == 'all' or item.name == 'test':
1281      archs.append(item.name)
1282
1283  if len(archs) < 2:
1284    raise IOError(f'Missing architecture directory \'{_args.arch}\'')
1285
1286  archs.sort()
1287  for arch in archs:
1288    arch_path = f'{_args.starting_dir}/{arch}'
1289    preprocess_arch_std_files(arch_path)
1290    ftw(arch_path, [], preprocess_one_file)
1291
1292  _bcs.compute()
1293  _args.output_file.write('static const char *const big_c_string =\n')
1294  for s in _bcs.big_string:
1295    _args.output_file.write(s)
1296  _args.output_file.write(';\n\n')
1297  for arch in archs:
1298    arch_path = f'{_args.starting_dir}/{arch}'
1299    ftw(arch_path, [], process_one_file)
1300    print_pending_events()
1301    print_pending_metrics()
1302
1303  print_mapping_table(archs)
1304  print_system_mapping_table()
1305  print_metricgroups()
1306
1307if __name__ == '__main__':
1308  main()
1309