1#!/usr/bin/env python3 2# 3# Copyright 2020, Data61, CSIRO (ABN 41 687 119 230) 4# 5# SPDX-License-Identifier: GPL-2.0-only 6# 7""" 8Extract information of interest to the seL4 image build process from the 9`platform_gen.yaml` file. 10 11THIS IS NOT A STABLE API. Use as a script, not a module. 12""" 13 14import argparse 15import sys 16import yaml 17 18from typing import Any, Dict, List, Tuple 19 20program_name = sys.argv[0] 21 22 23# You can run the doctests with `python3 -m doctest $THIS_FILE`. 24def is_valid(data: Dict[str, Any]) -> Tuple[bool, List[str]]: 25 """ 26 Verify that the `data` (which should be obtained from a YAML file using 27 `load_data()` contains a well-formed List of disjunct memory regions ordered 28 by increasing addresses. 29 30 Returns a tuple of a `bool` and a list of strings. The list is empty if 31 there were no problems, and describes one validation issue per element 32 otherwise. 33 34 >>> is_valid(None) 35 (False, ['no data in file']) 36 >>> is_valid({'devices': [{'end': 9699328, 'start': 9437184}]}) 37 (False, ['no description of memory in file (no "memory" key)']) 38 >>> is_valid({'memory': 1}) 39 (False, ['bad description of memory in file ("memory" is not a list)']) 40 >>> is_valid({'memory': []}) 41 (False, ['memory described as empty in file (list is zero-length)']) 42 >>> is_valid({'memory': [{'end': 1342177280, 'start': 268435456}]}) 43 (True, []) 44 >>> is_valid({'memory': [{'end': 1342177280}]}) 45 (False, ['region 0 is missing its start bound']) 46 >>> is_valid({'memory': [{'start': 268435456}]}) 47 (False, ['region 0 is missing its end bound']) 48 >>> is_valid({'memory': [{'junk': 'foo'}]}) 49 (False, ['region 0 is missing its start bound', 'region 0 is missing its end bound']) 50 >>> is_valid({'memory': [{'start': 'foo'}]}) 51 (False, ['region start "foo" is not an integer', 'region 0 is missing its end bound']) 52 >>> is_valid({'memory': [{'start': 'foo', 'end': 'bar'}]}) 53 (False, ['region start "foo" is not an integer', 'region end "bar" is not an integer']) 54 >>> is_valid({'memory': [{'start': 2048, 'end': 1024}]}) 55 (False, ['region bounds are not in strictly increasing order (1024 not > 2048)']) 56 >>> is_valid({'memory': [{'end': 4095, 'start': 0}, {'end': 65535, 'start': 32768}, {'end': 1342177280, 'start': 268435456}]}) 57 (True, []) 58 >>> is_valid({'memory': [{'end': 4095, 'start': 0}, {'end': 65535, 'start': 32768}, {'end': 1342177280, 'start': 268435456}, {'end': 16384, 'start': 32768}]}) 59 (False, ['region bounds are not in strictly increasing order (32768 not > 1342177280)', 'region bounds are not in strictly increasing order (16384 not > 1342177280)']) 60 """ 61 problems = [] 62 63 if data is None: 64 problems.append('no data in file') 65 elif 'memory' not in data: 66 problems.append('no description of memory in file (no "memory" key)') 67 elif not isinstance(data['memory'], list): 68 problems.append('bad description of memory in file' 69 ' ("memory" is not a list)') 70 elif len(data['memory']) == 0: 71 problems.append('memory described as empty in file' 72 ' (list is zero-length)') 73 else: 74 # The initialization of last_seen_bound works with the "increasing 75 # bounds" comparison below to require that all addresses be nonnegative. 76 last_seen_bound = -1 77 region_counter = 0 78 79 for region in data['memory']: 80 for bound in ('start', 'end'): 81 if bound not in region: 82 problems.append('region {n} is missing its {name} bound' 83 .format(n=region_counter, name=bound)) 84 elif not isinstance(region[bound], int): 85 problems.append('region {name} "{value}" is not an integer' 86 .format(name=bound, value=region[bound])) 87 elif not region[bound] > last_seen_bound: 88 problems.append('region bounds are not in strictly' 89 ' increasing order ({this} not > {last})' 90 .format(this=region[bound], 91 last=last_seen_bound)) 92 else: 93 last_seen_bound = region[bound] 94 95 region_counter += 1 96 97 if problems: 98 return (False, problems) 99 100 return (True, []) 101 102 103def report(data=None, c_symbols: Dict[str, str] = {}, use_c=False) -> str: 104 """ 105 Return a (typically multi-line) string with information about memory regions 106 described in `data`. The string is empty if `is_valid()` rejects the data. 107 108 The default string contents are human-readable; if `use_c` is `True`, C 109 syntax is emitted instead. The `c_symbols` dict describes the C symbol 110 names to be emitted. 111 """ 112 if not is_valid(data): 113 return '' 114 115 n = len(data['memory']) 116 117 if use_c: 118 # Extract C symbol names from the dict for convenience. 119 (array, length, tag) = ( 120 c_symbols['array_symbol'], 121 c_symbols['array_length_symbol'], 122 c_symbols['structure_tag_symbol'] 123 ) 124 125 # We want to mark generated code with a comment. For best comprehension 126 # (by the reader of the generated code), we want to clearly indicate (1) 127 # what generated the code and (2) where the generated section begins and 128 # ends. We also want the comments to otherwise be as similar as 129 # possible to facilitate any desired post-processing. To avoid 130 # repeating ourselves here (in Python), we generate a _template_ 131 # string containing a C comment with the name of the generating program 132 # embedded. The tag ("BEGIN" or "END") is then expanded when written to 133 # the appropriate place in the generated code. 134 comment_template = '/* generated by {} {{tag}} */'.format(program_name) 135 head = '''{comment_begin} 136int {length} = {n}; 137 138struct {tag} {{ 139 size_t start; 140 size_t end; 141}} {array}[{n}] = {{ 142'''.format(comment_begin=comment_template.format(tag='BEGIN'), 143 length=length, 144 tag=tag, 145 array=array, 146 n=n) 147 regions = [] 148 149 for r in range(n): 150 regions.append('''\t{{ .start = {start}, .end = {end} }},\ 151'''.format(start=data['memory'][r]['start'], end=data['memory'][r]['end'])) 152 153 body = '\n'.join(regions) 154 tail = '\n}};\n{}'''.format(comment_template.format(tag='END')) 155 report = '{head}{body}{tail}'.format(head=head, body=body, tail=tail) 156 else: 157 head = 'number of memory regions: {}\n'.format(n) 158 regions = [] 159 160 for r in range(n): 161 regions.append('''region {r}: 162\tstart: {start} 163\tend: {end}'''.format(r=r, start=data['memory'][r]['start'], 164 end=data['memory'][r]['end'])) 165 166 report = '{head}{body}'.format(head=head, body='\n'.join(regions)) 167 168 return report 169 170 171def load_data(yaml_filename: str) -> Dict[str, Any]: 172 """ 173 Call `yaml_load()` (from `pyyaml`) on `yaml_filename` and return a Dict 174 containing what was found there. 175 """ 176 with open(yaml_filename, 'r') as f: 177 data = yaml.safe_load(f) 178 179 return data 180 181 182def _process_operand(yaml_filename: str, c_symbols: Dict[str, str], 183 use_c: bool) -> bool: 184 """ 185 Handle one non-optional command-line argument; called by `main()`. 186 """ 187 data = load_data(yaml_filename) 188 (is_good_data, problems) = is_valid(data) 189 190 if is_good_data: 191 print(report(data, c_symbols, use_c=use_c)) 192 else: 193 # Set up a prefix for diagnostic messages. Diagnostics should always 194 # identify who is talking (`program_name`) and if operating on a file, 195 # should name the file in which trouble is encountered. Both of these 196 # make grep more effective. 197 prefix = "{pn}: file \"{fn}\":".format(pn=program_name, 198 fn=yaml_filename) 199 200 if len(problems) == 1: 201 sys.stderr.write("{} {}\n".format(prefix, problems[0])) 202 else: 203 sys.stderr.write("{} has multiple problems:\n".format(prefix)) 204 [sys.stderr.write('{}\t{}\n'.format(prefix, p)) for p in problems] 205 206 return False 207 208 return True 209 210 211def main() -> int: 212 """ 213 Executable entry point. 214 """ 215 parser = argparse.ArgumentParser( 216 formatter_class=argparse.RawDescriptionHelpFormatter, 217 description=""" 218Extract information of interest to the seL4 image build process from one or more 219files generated by `platform_gen.yaml`. 220 221If a YAML file lacks a description of memory, or fails to parse, a diagnostic is 222emitted and an exit status of 1 returned. Exit status 2 indicates a problem 223while attempting to parse arguments. 224 225Note that when `--emit-c-syntax` is specified, C99 designated initialisers are 226used in the generated code. This code can be used directly (e.g., inside a 227function body) or in a header file. 228 229An example of usage follows. Note the symbol names used, including those of the 230structure members. An array of structures is always used, even if there is only 231one region and therefore array element. The length of the array is explicitly 232exposed, rather than using values like "NULL, NULL" to mark the end of the list. 233 234``` 235#include "output_of_this_tool.h" 236 237int main(int argc, char *argv[]) { 238 for (int i = 0; i < num_memory_regions; i++) { 239 (void) printf("memory region %d: 0x%08lx - 0x%08lx\\n", 240 i, memory_region[i].start, memory_region[i].end); 241 } 242} 243``` 244""") 245 parser.add_argument('platform_filename', nargs='+', type=str, 246 help='YAML description of platform parameters') 247 parser.add_argument('--emit-c-syntax', action='store_true', 248 help='emit C syntax instead of human-readable output') 249 parser.add_argument('--array_symbol', type=str, 250 default='memory_region', 251 help='desired C identifier for struct array') 252 parser.add_argument('--array_length_symbol', type=str, 253 default='num_memory_regions', 254 help='desired C identifier for length of struct array') 255 parser.add_argument('--structure_tag_symbol', type=str, 256 default='memory_region', 257 help='desired C identifier for structure tag') 258 args = parser.parse_args() 259 there_was_any_trouble = False 260 261 c_symbols = { 262 'array_symbol': args.array_symbol, 263 'array_length_symbol': args.array_length_symbol, 264 'structure_tag_symbol': args.structure_tag_symbol, 265 } 266 267 for yaml_filename in args.platform_filename: 268 if not _process_operand(yaml_filename, c_symbols, 269 use_c=args.emit_c_syntax): 270 there_was_any_trouble = True 271 272 return 1 if there_was_any_trouble else 0 273 274 275if __name__ == '__main__': 276 sys.exit(main()) 277