1#!/usr/bin/env python3 2# 3# Copyright 2020, Data61, CSIRO (ABN 41 687 119 230) 4# 5# SPDX-License-Identifier: BSD-2-Clause 6# 7 8""" 9Script for generating latex from doxygen-generated xml files. 10The generatetd latex files are compatible with the seL4 manual. 11""" 12import argparse 13import sys 14import os 15import re 16from bs4 import BeautifulSoup 17import six 18 19 20class Generator(object): 21 # Dict mapping characters to their escape sequence in latex 22 ESCAPE_PATTERNS = {} 23 24 def get_parse_table(self): 25 # table of translations of xml children of 'para' elements 26 parse_table = { 27 'para': self.parse_recurse, 28 'computeroutput': lambda p, r: '%s' % self.get_text(p), 29 'texttt': lambda p, r: '%s' % self.get_text(p['text']), 30 'ref': self.ref_to_format, 31 'nameref': self.nref_to_format, 32 'shortref': lambda p, r: "%s" % p['sec'], 33 'obj': lambda p, r: "%s" % p['name'], 34 'errorenumdesc': lambda p, r: "", 35 'orderedlist': self.parse_ordered_list, 36 'listitem': lambda p, r: self.parse_para(p.para, r), 37 'itemizedlist': self.parse_itemized_list, 38 'autoref': lambda p, r: "%s" % p['label'], 39 'docref': self.parse_recurse 40 } 41 return parse_table 42 43 def default_return_doc(self, ret_type): 44 """ 45 Returns the latex doc for the return value of a function 46 implied by its return type 47 """ 48 49 return "" 50 51 def text_escape(self, string): 52 """ 53 Return a string with latex special characters escaped 54 """ 55 escape_regex = re.compile(re.escape('|'.join(self.ESCAPE_PATTERNS.keys()))) 56 return escape_regex.sub(lambda p: self.ESCAPE_PATTERNS[p.group()], string) 57 58 def get_text(self, soup, escape=True): 59 """ 60 Return a string containing a concatenation of a nodes text node 61 children, recursing into non-text nodes or escaping latex if 62 necessary. 63 """ 64 65 if isinstance(soup, str): 66 string = soup 67 elif isinstance(soup, six.string_types): 68 string = str(soup) 69 elif soup.string: 70 string = str(soup.string) 71 else: 72 string = soup.get_text() 73 74 if string is not None: 75 if escape: 76 return self.text_escape(string) 77 else: 78 return string 79 80 def ref_format(self, refid, ref_dict): 81 """Lookup refid in ref_dict and output the api function reference""" 82 return "" 83 84 def ref_to_format(self, para, ref_dict): 85 """Convert a reference by id to a latex command by looking up refid in para""" 86 if len(ref_dict) > 0: 87 return self.ref_format(para["refid"], ref_dict) 88 return "" 89 90 def nref_to_format(self, para, ref_dict): 91 """Convert a reference by name to a latex command by looking up refid in para""" 92 if len(ref_dict) > 0: 93 return self.ref_format(para["name"], ref_dict) 94 return "" 95 96 def parse_list(self, para, ref_dict, tag): 97 return "" 98 99 def parse_ordered_list(self, para, ref_dict): 100 """orderedlist --> enumerate""" 101 return self.parse_list(para, ref_dict, 'enumerate') 102 103 def parse_itemized_list(self, para, ref_dict): 104 """itemizedlist --> itemize""" 105 return self.parse_list(para, ref_dict, 'itemize') 106 107 def parse_recurse(self, para, ref_dict): 108 """Recursively parse a para element""" 109 # recurse on the contents 110 output = "" 111 for item in para.contents: 112 output += self.parse_para(item, ref_dict) 113 return output 114 115 def parse_para(self, para_node, ref_dict={}): 116 """ 117 Parse a paragraph node, handling special doxygen node types 118 that may appear inside a paragraph. Unhandled cases are 119 not parsed and result in an empty string. 120 """ 121 parse_table = self.get_parse_table() 122 if para_node.name is None: 123 return self.get_text(para_node, escape=True) 124 elif para_node.name in parse_table: 125 return parse_table[para_node.name](para_node, ref_dict) 126 else: 127 return "" 128 129 def parse_brief(self, parent): 130 """ 131 Parse the "brief description" section of a doxygen member. 132 """ 133 para_nodes = parent.find('briefdescription').find_all('para') 134 return "\n\n".join([self.parse_para(n) for n in para_nodes]) 135 136 def parse_detailed_desc(self, parent, ref_dict): 137 """ 138 Parse the "detailed description" section of a doxygen member. 139 """ 140 # parse the function parameters 141 params = {} 142 param_order = [] 143 types_iter = iter(parent.find_all('type')) 144 names = parent.find_all('declname') 145 146 # the first type is the return type 147 ret_type = six.next(types_iter) 148 149 # the rest are parameters 150 for n in names: 151 param_type = six.next(types_iter).text 152 if param_type == "void": 153 continue 154 params[str(n.text)] = {"type": param_type} 155 param_order.append(str(n.text)) 156 157 param_items = parent.find_all("parameteritem") 158 for param_item in param_items: 159 param_name_node = param_item.find("parametername") 160 param_desc_node = param_item.find("parameterdescription") 161 162 param_name = self.get_text(param_name_node, escape=False) 163 param_desc = self.parse_para(param_desc_node.find('para'), ref_dict) 164 165 params[param_name]["desc"] = param_desc 166 167 if len(params) == 0: 168 params_str = self.generate_empty_param_string() 169 else: 170 params_str = "" 171 for param_name in param_order: 172 param_info = params[param_name] 173 params_str += self.generate_param_string(param_info, param_name) 174 175 details = "" 176 for n in parent.detaileddescription.find_all('para', recursive=False): 177 if not n.parameterlist: 178 details += self.parse_para(n, ref_dict) 179 details += "\n\n" 180 181 ret_str = self.get_text(ret_type, escape=False) 182 ret = self.default_return_doc(ret_str.split()[-1]) 183 simplesects = parent.find_all("simplesect") 184 for n in simplesects: 185 if n['kind'] == "return": 186 ret = self.parse_para(n.find('para'), ref_dict) 187 break 188 return (self.todo_if_empty(details.strip()), params_str, self.todo_if_empty(ret.strip())) 189 190 def parse_prototype(self, parent, escape=True): 191 """ 192 Extract a function prototype from a doxygen member. 193 """ 194 195 inline = parent["inline"] == "yes" 196 static = parent["static"] == "yes" 197 ret_type = self.get_text(parent.find("type"), escape) 198 name = self.get_text(parent.find("name"), escape) 199 200 output = "%s %s" % (ret_type, name) 201 if inline: 202 output = "inline " + output 203 if static: 204 output = "static " + output 205 206 return output 207 208 def build_ref_dict(self, soup): 209 """ 210 Return a dict mapping reference ids and reference names 211 to details about the referee. 212 """ 213 214 ret = {} 215 for member in soup.find_all("memberdef"): 216 name = str(member.find('name').string) 217 label = member.manual['label'] 218 heading = member.manual['name'] 219 ref_id = member['id'] 220 data = { 221 "name": self.text_escape(name), 222 "original_name": name, 223 "label": label, 224 "ref": ref_id, 225 "heading": heading, 226 } 227 228 ret[ref_id] = data 229 ret[name] = data 230 231 return ret 232 233 def generate_param_string(self, param_info, param_name): 234 return "" 235 236 def generate_empty_param_string(self): 237 return "" 238 239 def generate_api_doc(self, level, member, params, ret, details): 240 return "" 241 242 def todo_if_empty(self, s): 243 """ 244 Returns its argument if its argument is non-none and non-empty, 245 otherwise returns "TODO" 246 """ 247 return s if s else "TODO" 248 249 250class LatexGenerator(Generator): 251 """ 252 A class that represents the generator for Doxygen to Latex. A child of the Generator class. 253 """ 254 255 # Dict mapping characters to their escape sequence in latex 256 ESCAPE_PATTERNS = { 257 "_": "\\_", 258 } 259 260 def get_parse_table(self): 261 parse_table = super(LatexGenerator, self).get_parse_table() 262 parse_table['computeroutput'] = lambda p, r: '\\texttt{%s}' % self.get_text(p) 263 parse_table['texttt'] = lambda p, r: '\\texttt{%s}' % self.get_text(p['text']) 264 parse_table['shortref'] = lambda p, r: "\\ref{sec:%s}" % p['sec'] 265 parse_table['obj'] = lambda p, r: "\\obj{%s}" % p['name'] 266 parse_table['errorenumdesc'] = lambda p, r: "\\errorenumdesc" 267 parse_table['listitem'] = lambda p, r: "\\item " + self.parse_para(p.para, r) + "\n" 268 parse_table['autoref'] = lambda p, r: "\\autoref{%s}" % p['label'] 269 return parse_table 270 271 def default_return_doc(self, ret_type): 272 """ 273 Returns the latex doc for the return value of a function 274 implied by its return type 275 """ 276 277 if ret_type == "void": 278 return "\\noret" 279 return "" 280 281 def ref_format(self, refid, ref_dict): 282 """Lookup refid in ref_dict and output the formatted latex reference""" 283 284 ref = ref_dict[refid] 285 return "\\apifunc{%(name)s}{%(label)s}" % ref 286 287 def parse_list(self, para, ref_dict, tag): 288 """Parse an ordered list element""" 289 290 output = '\\begin{%s}\n' % tag 291 for item in para.contents: 292 output += self.parse_para(item, ref_dict) 293 output += '\\end{%s}\n' % tag 294 return output 295 296 def todo_if_empty(self, s): 297 return s if s else "\\todo" 298 299 def generate_param_string(self, param_info, param_name): 300 return "\\param{%(type)s}{%(name)s}{%(desc)s}\n" % { 301 "type": self.get_text(param_info["type"]), 302 "name": self.get_text(param_name), 303 "desc": self.todo_if_empty(param_info.get("desc", "").strip()), 304 } 305 306 def generate_empty_param_string(self): 307 return "\\param{void}{}{}" 308 309 def generate_api_doc(self, level, member, params, ret, details): 310 manual_node = member.manual 311 return """ 312\\apidoc 313[{%(level)s}] 314{%(label)s} 315{%(name)s} 316{%(brief)s} 317{%(prototype)s} 318{%(params)s} 319{%(ret)s} 320{%(details)s} 321 """ % { 322 "level": self.level_to_header(level), 323 "label": manual_node["label"], 324 "name": self.text_escape(manual_node["name"]), 325 "brief": self.todo_if_empty(self.parse_brief(member)), 326 "prototype": self.parse_prototype(member), 327 "params": params, 328 "ret": ret, 329 "details": details, 330 } 331 332 def level_to_header(self, level): 333 if level == 0: 334 return 'chapter' 335 elif level == 1: 336 return 'section' 337 elif level == 2: 338 return 'subsection' 339 elif level == 3: 340 return 'subsubsection' 341 else: 342 return 'paragraph' 343 344 def level_to_heading(self, level, name): 345 return '\\' + self.level_to_header(level) + '{' + self.text_escape(name) + '}' 346 347 def gen_label(self, label): 348 return '\\label{' + label + '}\n' 349 350 351class MarkdownGenerator(Generator): 352 """ 353 A class that represents the generator for Doxygen to Markdown. A child of the Generator class 354 """ 355 356 # Dict mapping characters to their escape sequence in markdown 357 ESCAPE_PATTERNS = { 358 "`": "\`", 359 "#": "\#", 360 "_": "\_", 361 "*": "\*", 362 "[": "\[", 363 "]": "\]", 364 "-": "\-", 365 "+": "\+", 366 "!": "\!", 367 } 368 369 def get_parse_table(self): 370 parse_table = super(MarkdownGenerator, self).get_parse_table() 371 parse_table['computeroutput'] = lambda p, r: '`%s`' % self.get_text(p, escape=False) 372 parse_table['texttt'] = lambda p, r: '`%s`' % self.get_text(p['text'], escape=False) 373 parse_table['obj'] = lambda p, r: '**%s**' % p['name'] 374 parse_table['errorenumdesc'] = lambda p, r: '%s' % self.get_error_num_description() 375 parse_table['listitem'] = lambda p, r: self.parse_para(p.para, r) + "\n\n" 376 parse_table['autoref'] = lambda p, r: "autoref[%s]" % p['label'] 377 parse_table['docref'] = lambda p, r: "DOCREF" 378 return parse_table 379 380 def default_return_doc(self, ret_type): 381 """ 382 Returns the description for the return value of a function 383 implied by its return type 384 """ 385 386 if ret_type == "void": 387 return "This method does not return anything." 388 return "" 389 390 def ref_format(self, refid, ref_dict): 391 """ 392 Lookup refid in ref_dict and output the formatted Markdown reference 393 Creates a Markdown link 394 """ 395 396 ref = ref_dict[refid] 397 ref_anchor = (ref['heading'].lower()).replace(" ", "-") 398 return "[`%s`](#%s)" % (ref['original_name'], ref_anchor) 399 400 def get_error_num_description(self): 401 return "A return value of `0` indicates success. A non-zero value indicates that an error occurred." 402 403 def generate_itemize_list(self, para, ref_dict, output): 404 """ Returns a Markdown item list """ 405 406 for item in para.contents: 407 parsed_item = self.parse_para(item, ref_dict) 408 output += "* %s" % parsed_item if parsed_item.rstrip() else "" 409 return output 410 411 def generate_enumerate_list(self, para, ref_dict, output): 412 """ Returns a Markdown number list """ 413 414 for num, item in zip(range(sys.maxsize), para.contents): 415 parsed_item = self.parse_para(item, ref_dict) 416 output += "%d. %s" % (num, parsed_item) if parsed_item.rstrip() else "" 417 return output 418 419 def parse_list(self, para, ref_dict, tag): 420 """Parse an ordered list element""" 421 422 if tag == "enumerate": 423 list_generator = self.generate_enumerate_list 424 elif tag == "itemize": 425 list_generator = self.generate_itemize_list 426 output = '\n' 427 output += list_generator(para, ref_dict, output) 428 return output 429 430 def todo_if_empty(self, s): 431 return s if s else "*TODO*" 432 433 def generate_params(self, param_string): 434 """ 435 Returns the params in a formatted Markdown table 436 """ 437 438 if param_string: 439 return """ 440Type | Name | Description 441--- | --- | --- 442%s 443 """ % param_string 444 return "" 445 446 def generate_param_string(self, param_info, param_name): 447 return "`%(type)s` | `%(name)s` | %(desc)s\n" % { 448 "type": self.get_text(param_info["type"], escape=False), 449 "name": self.get_text(param_name, escape=False), 450 "desc": self.todo_if_empty(param_info.get("desc", "").strip()), 451 } 452 453 def generate_api_doc(self, level, member, params, ret, details): 454 manual_node = member.manual 455 456 # Descriptions that just contain a document reference are removed. 457 # Found by the 'DOCREF' symbol 458 match_details = re.match(r'^DOCREF$', details, re.M | re.I) 459 if match_details: 460 details_string = "" 461 else: 462 details_string = "**Description:** " + re.sub(r"\n(?!\n)", " ", details) 463 464 ret_string = "**Return value:** " + re.sub("\n(?!\n)", " ", ret) 465 466 # Removed any DOCREF symbols from the return, details and param strings 467 ret_string = re.sub(r'DOCREF', "", ret_string) 468 details_string = re.sub(r'DOCREF', "", details_string) 469 params_string = re.sub(r'DOCREF', "", params) 470 471 return """ 472%(hash)s %(name)s 473`%(prototype)s` 474 475%(brief)s 476%(params)s 477%(ret)s 478 479%(details)s 480""" % { 481 "hash": self.level_to_header(level), 482 "name": self.text_escape(manual_node["name"]), 483 "label": manual_node["label"], 484 "brief": self.todo_if_empty(self.parse_brief(member)), 485 "prototype": self.parse_prototype(member, escape=False), 486 "params": self.generate_params(params_string), 487 "ret": ret_string, 488 "details": details_string, 489 } 490 491 def level_to_header(self, level): 492 return (level + 1) * '#' 493 494 def level_to_heading(self, level, name): 495 return self.level_to_header(level) + ' ' + self.text_escape(name) + '\n' 496 497 def gen_label(self, label): 498 return '' 499 500 501def generate_general_syscall_doc(generator, input_file_name, level, ref_dict): 502 """ 503 Takes a path to a file containing doxygen-generated xml, 504 and return a string containing latex suitable for inclusion 505 in the sel4 manual. 506 """ 507 508 dir_name = os.path.dirname(input_file_name) 509 with open(input_file_name, "r") as f: 510 output = "" 511 soup = BeautifulSoup(f, "lxml") 512 elements = soup.find_all("memberdef") 513 summary = soup.find('compounddef') 514 # parse any top level descriptions 515 for ddesc in summary.find_all('detaileddescription', recursive=False): 516 if ddesc.para: 517 output += generator.parse_para(ddesc.para) 518 519 # parse any nested groups 520 for inner_group in soup.find_all("innergroup"): 521 new_input_file_name = inner_group["refid"] + '.xml' 522 new_input_file = os.path.join(dir_name, new_input_file_name) 523 output += generator.level_to_heading(level, inner_group.text) 524 output += generator.gen_label(inner_group["refid"]) 525 output += generate_general_syscall_doc(generator, new_input_file, level + 1, ref_dict) 526 527 # parse all of the function definitions 528 if len(elements) == 0 and output == "": 529 return "No methods." 530 531 for member in elements: 532 manual_node = member.manual 533 details, params, ret = generator.parse_detailed_desc(member, ref_dict) 534 output += generator.generate_api_doc(level, member, params, ret, details) 535 return output 536 537 538def process_args(): 539 """Process script arguments""" 540 parser = argparse.ArgumentParser() 541 542 parser.add_argument("-f", "--format", choices=["latex", "markdown"], 543 default="latex", help="Format of doxygen output") 544 545 parser.add_argument("-i", "--input", dest="input", type=str, 546 help="File containing doxygen-generated xml.") 547 parser.add_argument("-o", "--output", dest="output", type=str, 548 help="Output latex file.") 549 550 parser.add_argument("-l", "--level", type=int, 551 help="Level for each method, 0 = top level") 552 553 return parser 554 555 556def main(): 557 """Convert doxygen xml into a seL4 API LaTeX manual format""" 558 args = process_args().parse_args() 559 560 if not os.path.exists(os.path.dirname(args.output)): 561 os.makedirs(os.path.dirname(args.output)) 562 563 if args.format == "latex": 564 generator = LatexGenerator() 565 elif args.format == "markdown": 566 generator = MarkdownGenerator() 567 568 dir_name = os.path.dirname(args.input) 569 570 # create the refdict from all the group__*SystemCalls.xml files 571 ref_dict = {} 572 for (r, d, files) in os.walk(dir_name): 573 for f in files: 574 if "SystemCalls" not in f: 575 continue 576 with open(os.path.join(dir_name, f), "r") as source: 577 soup = BeautifulSoup(source, "lxml") 578 ref_dict.update(generator.build_ref_dict(soup)) 579 580 output_str = generate_general_syscall_doc(generator, args.input, args.level, ref_dict) 581 582 with open(args.output, "w") as output_file: 583 output_file.write(output_str) 584 585 586if __name__ == "__main__": 587 sys.exit(main()) 588