1#!/usr/bin/env python3
2#
3# Copyright 2020, Data61, CSIRO (ABN 41 687 119 230)
4#
5# SPDX-License-Identifier: GPL-2.0-only
6#
7"""
8Extract information of interest to the seL4 image build process from the
9`platform_gen.yaml` file.
10
11THIS IS NOT A STABLE API.  Use as a script, not a module.
12"""
13
14import argparse
15import sys
16import yaml
17
18from typing import Any, Dict, List, Tuple
19
20program_name = sys.argv[0]
21
22
23# You can run the doctests with `python3 -m doctest $THIS_FILE`.
24def is_valid(data: Dict[str, Any]) -> Tuple[bool, List[str]]:
25    """
26    Verify that the `data` (which should be obtained from a YAML file using
27    `load_data()` contains a well-formed List of disjunct memory regions ordered
28    by increasing addresses.
29
30    Returns a tuple of a `bool` and a list of strings.  The list is empty if
31    there were no problems, and describes one validation issue per element
32    otherwise.
33
34    >>> is_valid(None)
35    (False, ['no data in file'])
36    >>> is_valid({'devices': [{'end': 9699328, 'start': 9437184}]})
37    (False, ['no description of memory in file (no "memory" key)'])
38    >>> is_valid({'memory': 1})
39    (False, ['bad description of memory in file ("memory" is not a list)'])
40    >>> is_valid({'memory': []})
41    (False, ['memory described as empty in file (list is zero-length)'])
42    >>> is_valid({'memory': [{'end': 1342177280, 'start': 268435456}]})
43    (True, [])
44    >>> is_valid({'memory': [{'end': 1342177280}]})
45    (False, ['region 0 is missing its start bound'])
46    >>> is_valid({'memory': [{'start': 268435456}]})
47    (False, ['region 0 is missing its end bound'])
48    >>> is_valid({'memory': [{'junk': 'foo'}]})
49    (False, ['region 0 is missing its start bound', 'region 0 is missing its end bound'])
50    >>> is_valid({'memory': [{'start': 'foo'}]})
51    (False, ['region start "foo" is not an integer', 'region 0 is missing its end bound'])
52    >>> is_valid({'memory': [{'start': 'foo', 'end': 'bar'}]})
53    (False, ['region start "foo" is not an integer', 'region end "bar" is not an integer'])
54    >>> is_valid({'memory': [{'start': 2048, 'end': 1024}]})
55    (False, ['region bounds are not in strictly increasing order (1024 not > 2048)'])
56    >>> is_valid({'memory': [{'end': 4095, 'start': 0}, {'end': 65535, 'start': 32768}, {'end': 1342177280, 'start': 268435456}]})
57    (True, [])
58    >>> is_valid({'memory': [{'end': 4095, 'start': 0}, {'end': 65535, 'start': 32768}, {'end': 1342177280, 'start': 268435456}, {'end': 16384, 'start': 32768}]})
59    (False, ['region bounds are not in strictly increasing order (32768 not > 1342177280)', 'region bounds are not in strictly increasing order (16384 not > 1342177280)'])
60    """
61    problems = []
62
63    if data is None:
64        problems.append('no data in file')
65    elif 'memory' not in data:
66        problems.append('no description of memory in file (no "memory" key)')
67    elif not isinstance(data['memory'], list):
68        problems.append('bad description of memory in file'
69                        ' ("memory" is not a list)')
70    elif len(data['memory']) == 0:
71        problems.append('memory described as empty in file'
72                        ' (list is zero-length)')
73    else:
74        # The initialization of last_seen_bound works with the "increasing
75        # bounds" comparison below to require that all addresses be nonnegative.
76        last_seen_bound = -1
77        region_counter = 0
78
79        for region in data['memory']:
80            for bound in ('start', 'end'):
81                if bound not in region:
82                    problems.append('region {n} is missing its {name} bound'
83                                    .format(n=region_counter, name=bound))
84                elif not isinstance(region[bound], int):
85                    problems.append('region {name} "{value}" is not an integer'
86                                    .format(name=bound, value=region[bound]))
87                elif not region[bound] > last_seen_bound:
88                    problems.append('region bounds are not in strictly'
89                                    ' increasing order ({this} not > {last})'
90                                    .format(this=region[bound],
91                                            last=last_seen_bound))
92                else:
93                    last_seen_bound = region[bound]
94
95            region_counter += 1
96
97    if problems:
98        return (False, problems)
99
100    return (True, [])
101
102
103def report(data=None, c_symbols: Dict[str, str] = {}, use_c=False) -> str:
104    """
105    Return a (typically multi-line) string with information about memory regions
106    described in `data`.  The string is empty if `is_valid()` rejects the data.
107
108    The default string contents are human-readable; if `use_c` is `True`, C
109    syntax is emitted instead.  The `c_symbols` dict describes the C symbol
110    names to be emitted.
111    """
112    if not is_valid(data):
113        return ''
114
115    n = len(data['memory'])
116
117    if use_c:
118        # Extract C symbol names from the dict for convenience.
119        (array, length, tag) = (
120            c_symbols['array_symbol'],
121            c_symbols['array_length_symbol'],
122            c_symbols['structure_tag_symbol']
123        )
124
125        # We want to mark generated code with a comment.  For best comprehension
126        # (by the reader of the generated code), we want to clearly indicate (1)
127        # what generated the code and (2) where the generated section begins and
128        # ends.  We also want the comments to otherwise be as similar as
129        # possible to facilitate any desired post-processing.  To avoid
130        # repeating ourselves here (in Python), we generate a _template_
131        # string containing a C comment with the name of the generating program
132        # embedded.  The tag ("BEGIN" or "END") is then expanded when written to
133        # the appropriate place in the generated code.
134        comment_template = '/* generated by {} {{tag}} */'.format(program_name)
135        head = '''{comment_begin}
136int {length} = {n};
137
138struct {tag} {{
139    size_t start;
140    size_t end;
141}} {array}[{n}] = {{
142'''.format(comment_begin=comment_template.format(tag='BEGIN'),
143           length=length,
144           tag=tag,
145           array=array,
146           n=n)
147        regions = []
148
149        for r in range(n):
150            regions.append('''\t{{ .start = {start}, .end = {end} }},\
151'''.format(start=data['memory'][r]['start'], end=data['memory'][r]['end']))
152
153        body = '\n'.join(regions)
154        tail = '\n}};\n{}'''.format(comment_template.format(tag='END'))
155        report = '{head}{body}{tail}'.format(head=head, body=body, tail=tail)
156    else:
157        head = 'number of memory regions: {}\n'.format(n)
158        regions = []
159
160        for r in range(n):
161            regions.append('''region {r}:
162\tstart: {start}
163\tend: {end}'''.format(r=r, start=data['memory'][r]['start'],
164                       end=data['memory'][r]['end']))
165
166        report = '{head}{body}'.format(head=head, body='\n'.join(regions))
167
168    return report
169
170
171def load_data(yaml_filename: str) -> Dict[str, Any]:
172    """
173    Call `yaml_load()` (from `pyyaml`) on `yaml_filename` and return a Dict
174    containing what was found there.
175    """
176    with open(yaml_filename, 'r') as f:
177        data = yaml.safe_load(f)
178
179    return data
180
181
182def _process_operand(yaml_filename: str, c_symbols: Dict[str, str],
183                     use_c: bool) -> bool:
184    """
185    Handle one non-optional command-line argument; called by `main()`.
186    """
187    data = load_data(yaml_filename)
188    (is_good_data, problems) = is_valid(data)
189
190    if is_good_data:
191        print(report(data, c_symbols, use_c=use_c))
192    else:
193        # Set up a prefix for diagnostic messages.  Diagnostics should always
194        # identify who is talking (`program_name`) and if operating on a file,
195        # should name the file in which trouble is encountered.  Both of these
196        # make grep more effective.
197        prefix = "{pn}: file \"{fn}\":".format(pn=program_name,
198                                               fn=yaml_filename)
199
200        if len(problems) == 1:
201            sys.stderr.write("{} {}\n".format(prefix, problems[0]))
202        else:
203            sys.stderr.write("{} has multiple problems:\n".format(prefix))
204            [sys.stderr.write('{}\t{}\n'.format(prefix, p)) for p in problems]
205
206        return False
207
208    return True
209
210
211def main() -> int:
212    """
213    Executable entry point.
214    """
215    parser = argparse.ArgumentParser(
216        formatter_class=argparse.RawDescriptionHelpFormatter,
217        description="""
218Extract information of interest to the seL4 image build process from one or more
219files generated by `platform_gen.yaml`.
220
221If a YAML file lacks a description of memory, or fails to parse, a diagnostic is
222emitted and an exit status of 1 returned.  Exit status 2 indicates a problem
223while attempting to parse arguments.
224
225Note that when `--emit-c-syntax` is specified, C99 designated initialisers are
226used in the generated code.  This code can be used directly (e.g., inside a
227function body) or in a header file.
228
229An example of usage follows.  Note the symbol names used, including those of the
230structure members.  An array of structures is always used, even if there is only
231one region and therefore array element.  The length of the array is explicitly
232exposed, rather than using values like "NULL, NULL" to mark the end of the list.
233
234```
235#include "output_of_this_tool.h"
236
237int main(int argc, char *argv[]) {
238    for (int i = 0; i < num_memory_regions; i++) {
239        (void) printf("memory region %d: 0x%08lx - 0x%08lx\\n",
240                      i, memory_region[i].start, memory_region[i].end);
241    }
242}
243```
244""")
245    parser.add_argument('platform_filename', nargs='+', type=str,
246                        help='YAML description of platform parameters')
247    parser.add_argument('--emit-c-syntax', action='store_true',
248                        help='emit C syntax instead of human-readable output')
249    parser.add_argument('--array_symbol', type=str,
250                        default='memory_region',
251                        help='desired C identifier for struct array')
252    parser.add_argument('--array_length_symbol', type=str,
253                        default='num_memory_regions',
254                        help='desired C identifier for length of struct array')
255    parser.add_argument('--structure_tag_symbol', type=str,
256                        default='memory_region',
257                        help='desired C identifier for structure tag')
258    args = parser.parse_args()
259    there_was_any_trouble = False
260
261    c_symbols = {
262        'array_symbol': args.array_symbol,
263        'array_length_symbol': args.array_length_symbol,
264        'structure_tag_symbol': args.structure_tag_symbol,
265    }
266
267    for yaml_filename in args.platform_filename:
268        if not _process_operand(yaml_filename, c_symbols,
269                                use_c=args.emit_c_syntax):
270            there_was_any_trouble = True
271
272    return 1 if there_was_any_trouble else 0
273
274
275if __name__ == '__main__':
276    sys.exit(main())
277