1#!/usr/bin/env python 2# 3# Copyright 2017, Data61 4# Commonwealth Scientific and Industrial Research Organisation (CSIRO) 5# ABN 41 687 119 230. 6# 7# This software may be distributed and modified according to the terms of 8# the BSD 2-Clause license. Note that NO WARRANTY is provided. 9# See "LICENSE_BSD2.txt" for details. 10# 11# @TAG(DATA61_BSD) 12# 13 14""" 15Script for generating latex from doxygen-generated xml files. 16The generatetd latex files are compatible with the seL4 manual. 17""" 18import argparse 19import sys 20import os 21import re 22from bs4 import BeautifulSoup 23import six 24 25class Generator(object): 26 # Dict mapping characters to their escape sequence in latex 27 ESCAPE_PATTERNS = {} 28 29 def get_parse_table(self): 30 # table of translations of xml children of 'para' elements 31 parse_table = { 32 'para' : self.parse_recurse, 33 'computeroutput': lambda p, r: '%s' % self.get_text(p), 34 'texttt' : lambda p, r: '%s' % self.get_text(p['text']), 35 'ref' : self.ref_to_format, 36 'nameref' : self.nref_to_format, 37 'shortref' : lambda p, r: "%s" % p['sec'], 38 'obj' : lambda p, r: "%s" % p['name'], 39 'errorenumdesc' : lambda p, r: "", 40 'orderedlist' : self.parse_ordered_list, 41 'listitem' : lambda p, r: self.parse_para(p.para, r), 42 'itemizedlist' : self.parse_itemized_list, 43 'autoref' : lambda p, r: "%s" % p['label'], 44 'docref' : self.parse_recurse 45 } 46 return parse_table 47 48 def default_return_doc(self, ret_type): 49 """ 50 Returns the latex doc for the return value of a function 51 implied by its return type 52 """ 53 54 return "" 55 56 def text_escape(self, string): 57 """ 58 Return a string with latex special characters escaped 59 """ 60 escape_regex = re.compile(re.escape('|'.join(self.ESCAPE_PATTERNS.keys()))) 61 return escape_regex.sub(lambda p: self.ESCAPE_PATTERNS[p.group()], string) 62 63 def get_text(self, soup, escape=True): 64 """ 65 Return a string containing a concatenation of a nodes text node 66 children, recursing into non-text nodes or escaping latex if 67 necessary. 68 """ 69 70 if isinstance(soup, str): 71 string = soup 72 elif isinstance(soup, six.string_types): 73 string = str(soup) 74 elif soup.string: 75 string = str(soup.string) 76 else: 77 string = soup.get_text() 78 79 if string is not None: 80 if escape: 81 return self.text_escape(string) 82 else: 83 return string 84 85 def ref_format(self, refid, ref_dict): 86 """Lookup refid in ref_dict and output the api function reference""" 87 return "" 88 89 def ref_to_format(self, para, ref_dict): 90 """Convert a reference by id to a latex command by looking up refid in para""" 91 if len(ref_dict) > 0: 92 return self.ref_format(para["refid"], ref_dict) 93 return "" 94 95 def nref_to_format(self, para, ref_dict): 96 """Convert a reference by name to a latex command by looking up refid in para""" 97 if len(ref_dict) > 0: 98 return self.ref_format(para["name"], ref_dict) 99 return "" 100 101 def parse_list(self, para, ref_dict, tag): 102 return "" 103 104 def parse_ordered_list(self, para, ref_dict): 105 """orderedlist --> enumerate""" 106 return self.parse_list(para, ref_dict, 'enumerate') 107 108 def parse_itemized_list(self, para, ref_dict): 109 """itemizedlist --> itemize""" 110 return self.parse_list(para, ref_dict, 'itemize') 111 112 def parse_recurse(self, para, ref_dict): 113 """Recursively parse a para element""" 114 # recurse on the contents 115 output = "" 116 for item in para.contents: 117 output += self.parse_para(item, ref_dict) 118 return output 119 120 def parse_para(self, para_node, ref_dict={}): 121 """ 122 Parse a paragraph node, handling special doxygen node types 123 that may appear inside a paragraph. Unhandled cases are 124 not parsed and result in an empty string. 125 """ 126 parse_table = self.get_parse_table() 127 if para_node.name is None: 128 return self.get_text(para_node, escape=True) 129 elif para_node.name in parse_table: 130 return parse_table[para_node.name](para_node, ref_dict) 131 else: 132 return "" 133 134 def parse_brief(self, parent): 135 """ 136 Parse the "brief description" section of a doxygen member. 137 """ 138 para_nodes = parent.find('briefdescription').find_all('para') 139 return "\n\n".join([self.parse_para(n) for n in para_nodes]) 140 141 def parse_detailed_desc(self, parent, ref_dict): 142 """ 143 Parse the "detailed description" section of a doxygen member. 144 """ 145 # parse the function parameters 146 params = {} 147 param_order = [] 148 types_iter = iter(parent.find_all('type')) 149 names = parent.find_all('declname') 150 151 # the first type is the return type 152 ret_type = six.next(types_iter) 153 154 # the rest are parameters 155 for n in names: 156 param_type = six.next(types_iter).text 157 if param_type == "void": 158 continue 159 params[str(n.text)] = {"type": param_type} 160 param_order.append(str(n.text)) 161 162 param_items = parent.find_all("parameteritem") 163 for param_item in param_items: 164 param_name_node = param_item.find("parametername") 165 param_desc_node = param_item.find("parameterdescription") 166 167 param_name = self.get_text(param_name_node, escape=False) 168 param_desc = self.parse_para(param_desc_node.find('para'), ref_dict) 169 170 params[param_name]["desc"] = param_desc 171 172 if len(params) == 0: 173 params_str = self.generate_empty_param_string() 174 else: 175 params_str = "" 176 for param_name in param_order: 177 param_info = params[param_name] 178 params_str += self.generate_param_string(param_info, param_name) 179 180 details = "" 181 for n in parent.detaileddescription.find_all('para', recursive=False): 182 if not n.parameterlist: 183 details += self.parse_para(n, ref_dict) 184 details += "\n\n" 185 186 ret_str = self.get_text(ret_type, escape=False) 187 ret = self.default_return_doc(ret_str.split()[-1]) 188 simplesects = parent.find_all("simplesect") 189 for n in simplesects: 190 if n['kind'] == "return": 191 ret = self.parse_para(n.find('para'), ref_dict) 192 break 193 return (self.todo_if_empty(details.strip()), params_str, self.todo_if_empty(ret.strip())) 194 195 def parse_prototype(self, parent, escape=True): 196 """ 197 Extract a function prototype from a doxygen member. 198 """ 199 200 inline = parent["inline"] == "yes" 201 static = parent["static"] == "yes" 202 ret_type = self.get_text(parent.find("type"), escape) 203 name = self.get_text(parent.find("name"), escape) 204 205 output = "%s %s" % (ret_type, name) 206 if inline: 207 output = "inline " + output 208 if static: 209 output = "static " + output 210 211 return output 212 213 def build_ref_dict(self, soup): 214 """ 215 Return a dict mapping reference ids and reference names 216 to details about the referee. 217 """ 218 219 ret = {} 220 for member in soup.find_all("memberdef"): 221 name = str(member.find('name').string) 222 label = member.manual['label'] 223 heading = member.manual['name'] 224 ref_id = member['id'] 225 data = { 226 "name": self.text_escape(name), 227 "original_name" : name, 228 "label": label, 229 "ref": ref_id, 230 "heading": heading, 231 } 232 233 ret[ref_id] = data 234 ret[name] = data 235 236 return ret 237 238 def generate_param_string(self, param_info, param_name): 239 return "" 240 241 def generate_empty_param_string(self): 242 return "" 243 244 def generate_api_doc(self, level, member, params, ret, details): 245 return "" 246 247 def todo_if_empty(self, s): 248 """ 249 Returns its argument if its argument is non-none and non-empty, 250 otherwise returns "TODO" 251 """ 252 return s if s else "TODO" 253 254class LatexGenerator(Generator): 255 """ 256 A class that represents the generator for Doxygen to Latex. A child of the Generator class. 257 """ 258 259 # Dict mapping characters to their escape sequence in latex 260 ESCAPE_PATTERNS = { 261 "_": "\\_", 262 } 263 264 def get_parse_table(self): 265 parse_table = super(LatexGenerator, self).get_parse_table() 266 parse_table['computeroutput'] = lambda p, r: '\\texttt{%s}' % self.get_text(p) 267 parse_table['texttt'] = lambda p, r: '\\texttt{%s}' % self.get_text(p['text']) 268 parse_table['shortref'] = lambda p, r: "\\ref{sec:%s}" % p['sec'] 269 parse_table['obj'] = lambda p, r: "\\obj{%s}" % p['name'] 270 parse_table['errorenumdesc'] = lambda p, r: "\\errorenumdesc" 271 parse_table['listitem'] = lambda p, r: "\\item " + self.parse_para(p.para, r) + "\n" 272 parse_table['autoref'] = lambda p, r: "\\autoref{%s}" % p['label'] 273 return parse_table 274 275 def default_return_doc(self, ret_type): 276 """ 277 Returns the latex doc for the return value of a function 278 implied by its return type 279 """ 280 281 if ret_type == "void": 282 return "\\noret" 283 return "" 284 285 def ref_format(self, refid, ref_dict): 286 """Lookup refid in ref_dict and output the formatted latex reference""" 287 288 ref = ref_dict[refid] 289 return "\\apifunc{%(name)s}{%(label)s}" % ref 290 291 def parse_list(self, para, ref_dict, tag): 292 """Parse an ordered list element""" 293 294 output = '\\begin{%s}\n' % tag 295 for item in para.contents: 296 output += self.parse_para(item, ref_dict) 297 output += '\\end{%s}\n' % tag 298 return output 299 300 def todo_if_empty(self, s): 301 return s if s else "\\todo" 302 303 def generate_param_string(self, param_info, param_name): 304 return "\\param{%(type)s}{%(name)s}{%(desc)s}\n" % { 305 "type": self.get_text(param_info["type"]), 306 "name": self.get_text(param_name), 307 "desc": self.todo_if_empty(param_info.get("desc", "").strip()), 308 } 309 310 def generate_empty_param_string(self): 311 return "\\param{void}{}{}" 312 313 def generate_api_doc(self, level, member, params, ret, details): 314 manual_node = member.manual 315 return """ 316\\apidoc 317[{%(level)s}] 318{%(label)s} 319{%(name)s} 320{%(brief)s} 321{%(prototype)s} 322{%(params)s} 323{%(ret)s} 324{%(details)s} 325 """ % { 326 "level": self.level_to_header(level), 327 "label": manual_node["label"], 328 "name": self.text_escape(manual_node["name"]), 329 "brief": self.todo_if_empty(self.parse_brief(member)), 330 "prototype": self.parse_prototype(member), 331 "params": params, 332 "ret": ret, 333 "details": details, 334 } 335 336 def level_to_header(self, level): 337 if level == 0: 338 return 'chapter' 339 elif level == 1: 340 return 'section' 341 elif level == 2: 342 return 'subsection' 343 elif level == 3: 344 return 'subsubsection' 345 else: 346 return 'paragraph' 347 348 def level_to_heading(self, level, name): 349 return '\\' + self.level_to_header(level) + '{' + self.text_escape(name) + '}' 350 351 def gen_label(self, label): 352 return '\\label{' + label + '}\n' 353 354class MarkdownGenerator(Generator): 355 """ 356 A class that represents the generator for Doxygen to Markdown. A child of the Generator class 357 """ 358 359 # Dict mapping characters to their escape sequence in markdown 360 ESCAPE_PATTERNS = { 361 "`" : "\`", 362 "#" : "\#", 363 "_" : "\_", 364 "*" : "\*", 365 "[" : "\[", 366 "]" : "\]", 367 "-" : "\-", 368 "+" : "\+", 369 "!" : "\!", 370 } 371 372 def get_parse_table(self): 373 parse_table = super(MarkdownGenerator, self).get_parse_table() 374 parse_table['computeroutput'] = lambda p, r: '`%s`' % self.get_text(p, escape=False) 375 parse_table['texttt'] = lambda p, r: '`%s`' % self.get_text(p['text'], escape=False) 376 parse_table['obj'] = lambda p, r: '**%s**' % p['name'] 377 parse_table['errorenumdesc'] = lambda p, r: '%s' % self.get_error_num_description() 378 parse_table['listitem'] = lambda p, r: self.parse_para(p.para, r) + "\n\n" 379 parse_table['autoref'] = lambda p, r: "autoref[%s]" % p['label'] 380 parse_table['docref'] = lambda p, r: "DOCREF" 381 return parse_table 382 383 384 def default_return_doc(self, ret_type): 385 """ 386 Returns the description for the return value of a function 387 implied by its return type 388 """ 389 390 if ret_type == "void": 391 return "This method does not return anything." 392 return "" 393 394 def ref_format(self, refid, ref_dict): 395 """ 396 Lookup refid in ref_dict and output the formatted Markdown reference 397 Creates a Markdown link 398 """ 399 400 ref = ref_dict[refid] 401 ref_anchor = (ref['heading'].lower()).replace(" ", "-") 402 return "[`%s`](#%s)" % (ref['original_name'], ref_anchor) 403 404 def get_error_num_description(self): 405 return "A return value of `0` indicates success. A non-zero value indicates that an error occurred." 406 407 def generate_itemize_list(self, para, ref_dict, output): 408 """ Returns a Markdown item list """ 409 410 for item in para.contents: 411 parsed_item = self.parse_para(item, ref_dict) 412 output +="* %s" % parsed_item if parsed_item.rstrip() else "" 413 return output 414 415 def generate_enumerate_list(self, para, ref_dict, output): 416 """ Returns a Markdown number list """ 417 418 for num,item in zip(xrange(sys.maxint),para.contents): 419 parsed_item = self.parse_para(item, ref_dict) 420 output +="%d. %s" % (num, parsed_item) if parsed_item.rstrip() else "" 421 return output 422 423 def parse_list(self, para, ref_dict, tag): 424 """Parse an ordered list element""" 425 426 if tag == "enumerate": 427 list_generator = self.generate_enumerate_list 428 elif tag == "itemize": 429 list_generator = self.generate_itemize_list 430 output = '\n' 431 output += list_generator(para, ref_dict, output) 432 return output 433 434 def todo_if_empty(self, s): 435 return s if s else "*TODO*" 436 437 def generate_params(self, param_string): 438 """ 439 Returns the params in a formatted Markdown table 440 """ 441 442 if param_string: 443 return """ 444Type | Name | Description 445--- | --- | --- 446%s 447 """ % param_string 448 return "" 449 450 def generate_param_string(self, param_info, param_name): 451 return "`%(type)s` | `%(name)s` | %(desc)s\n" % { 452 "type": self.get_text(param_info["type"],escape=False), 453 "name": self.get_text(param_name,escape=False), 454 "desc": self.todo_if_empty(param_info.get("desc", "").strip()), 455 } 456 457 def generate_api_doc(self, level, member, params, ret, details): 458 manual_node = member.manual 459 460 # Descriptions that just contain a document reference are removed. 461 # Found by the 'DOCREF' symbol 462 match_details = re.match( r'^DOCREF$', details, re.M|re.I) 463 if match_details: 464 details_string = "" 465 else: 466 details_string = "**Description:** " + re.sub(r"\n(?!\n)", " ", details) 467 468 ret_string = "**Return value:** " + re.sub("\n(?!\n)", " ", ret) 469 470 # Removed any DOCREF symbols from the return, details and param strings 471 ret_string = re.sub(r'DOCREF', "", ret_string) 472 details_string = re.sub(r'DOCREF', "", details_string) 473 params_string = re.sub(r'DOCREF', "", params) 474 475 return """ 476%(hash)s %(name)s 477`%(prototype)s` 478 479%(brief)s 480%(params)s 481%(ret)s 482 483%(details)s 484""" % { 485 "hash": self.level_to_header(level), 486 "name": self.text_escape(manual_node["name"]), 487 "label": manual_node["label"], 488 "brief": self.todo_if_empty(self.parse_brief(member)), 489 "prototype": self.parse_prototype(member, escape=False), 490 "params": self.generate_params(params_string), 491 "ret": ret_string, 492 "details": details_string, 493 } 494 495 def level_to_header(self, level): 496 return (level + 1) * '#' 497 498 def level_to_heading(self, level, name): 499 return self.level_to_header(level) + ' ' + self.text_escape(name) + '\n' 500 501 def gen_label(self, label): 502 return '' 503 504def generate_general_syscall_doc(generator, input_file_name, level): 505 """ 506 Takes a path to a file containing doxygen-generated xml, 507 and return a string containing latex suitable for inclusion 508 in the sel4 manual. 509 """ 510 511 dir_name = os.path.dirname(input_file_name) 512 with open(input_file_name, "r") as f: 513 output = "" 514 soup = BeautifulSoup(f, "lxml") 515 ref_dict = generator.build_ref_dict(soup) 516 elements = soup.find_all("memberdef") 517 summary = soup.find('compounddef') 518 # parse any top level descriptions 519 for ddesc in summary.find_all('detaileddescription', recursive=False): 520 if ddesc.para: 521 output += generator.parse_para(ddesc.para) 522 523 # parse any nested groups 524 for inner_group in soup.find_all("innergroup"): 525 new_input_file_name = inner_group["refid"] + '.xml' 526 new_input_file = os.path.join(dir_name, new_input_file_name) 527 output += generator.level_to_heading(level, inner_group.text) 528 output += generator.gen_label(inner_group["refid"]) 529 output += generate_general_syscall_doc(generator, new_input_file, level + 1) 530 531 # parse all of the function definitions 532 if len(elements) == 0 and output == "": 533 return "No methods." 534 535 for member in elements: 536 manual_node = member.manual 537 details, params, ret = generator.parse_detailed_desc(member, ref_dict) 538 output += generator.generate_api_doc(level, member, params, ret, details) 539 return output 540 541def process_args(): 542 """Process script arguments""" 543 parser = argparse.ArgumentParser() 544 545 parser.add_argument("-f", "--format", choices=["latex", "markdown"], 546 default="latex", help="Format of doxygen output") 547 548 parser.add_argument("-i", "--input", dest="input", type=str, 549 help="File containing doxygen-generated xml.") 550 parser.add_argument("-o", "--output", dest="output", type=str, 551 help="Output latex file.") 552 553 parser.add_argument("-l", "--level", type=int, 554 help="Level for each method, 0 = top level") 555 556 return parser 557 558def main(): 559 """Convert doxygen xml into a seL4 API LaTeX manual format""" 560 args = process_args().parse_args() 561 562 if not os.path.exists(os.path.dirname(args.output)): 563 os.makedirs(os.path.dirname(args.output)) 564 565 if args.format == "latex": 566 generator = LatexGenerator() 567 elif args.format == "markdown": 568 generator = MarkdownGenerator() 569 570 output_str = generate_general_syscall_doc(generator, args.input, args.level) 571 572 with open(args.output, "w") as output_file: 573 output_file.write(output_str) 574 575if __name__ == "__main__": 576 sys.exit(main()) 577