1#!/usr/bin/env python
2#
3# Copyright 2017, Data61
4# Commonwealth Scientific and Industrial Research Organisation (CSIRO)
5# ABN 41 687 119 230.
6#
7# This software may be distributed and modified according to the terms of
8# the BSD 2-Clause license. Note that NO WARRANTY is provided.
9# See "LICENSE_BSD2.txt" for details.
10#
11# @TAG(DATA61_BSD)
12#
13
14"""
15Script for generating latex from doxygen-generated xml files.
16The generatetd latex files are compatible with the seL4 manual.
17"""
18import argparse
19import sys
20import os
21import re
22from bs4 import BeautifulSoup
23import six
24
25class Generator(object):
26    # Dict mapping characters to their escape sequence in latex
27    ESCAPE_PATTERNS = {}
28
29    def get_parse_table(self):
30        # table of translations of xml children of 'para' elements
31        parse_table = {
32            'para'          : self.parse_recurse,
33            'computeroutput': lambda p, r: '%s' % self.get_text(p),
34            'texttt'        : lambda p, r: '%s' % self.get_text(p['text']),
35            'ref'           : self.ref_to_format,
36            'nameref'       : self.nref_to_format,
37            'shortref'      : lambda p, r: "%s" % p['sec'],
38            'obj'           : lambda p, r: "%s" % p['name'],
39            'errorenumdesc' : lambda p, r: "",
40            'orderedlist'   : self.parse_ordered_list,
41            'listitem'      : lambda p, r: self.parse_para(p.para, r),
42            'itemizedlist'  : self.parse_itemized_list,
43            'autoref'       : lambda p, r: "%s" % p['label'],
44            'docref'        : self.parse_recurse
45        }
46        return parse_table
47
48    def default_return_doc(self, ret_type):
49        """
50        Returns the latex doc for the return value of a function
51        implied by its return type
52        """
53
54        return ""
55
56    def text_escape(self, string):
57        """
58        Return a string with latex special characters escaped
59        """
60        escape_regex = re.compile(re.escape('|'.join(self.ESCAPE_PATTERNS.keys())))
61        return escape_regex.sub(lambda p: self.ESCAPE_PATTERNS[p.group()], string)
62
63    def get_text(self, soup, escape=True):
64        """
65        Return a string containing a concatenation of a nodes text node
66        children, recursing into non-text nodes or escaping latex if
67        necessary.
68        """
69
70        if isinstance(soup, str):
71            string = soup
72        elif isinstance(soup, six.string_types):
73            string = str(soup)
74        elif soup.string:
75            string = str(soup.string)
76        else:
77            string = soup.get_text()
78
79        if string is not None:
80            if escape:
81                return self.text_escape(string)
82            else:
83                return string
84
85    def ref_format(self, refid, ref_dict):
86        """Lookup refid in ref_dict and output the api function reference"""
87        return ""
88
89    def ref_to_format(self, para, ref_dict):
90        """Convert a reference by id to a latex command by looking up refid in para"""
91        if len(ref_dict) > 0:
92            return self.ref_format(para["refid"], ref_dict)
93        return ""
94
95    def nref_to_format(self, para, ref_dict):
96        """Convert a reference by name to a latex command by looking up refid in para"""
97        if len(ref_dict) > 0:
98            return self.ref_format(para["name"], ref_dict)
99        return ""
100
101    def parse_list(self, para, ref_dict, tag):
102        return ""
103
104    def parse_ordered_list(self, para, ref_dict):
105        """orderedlist --> enumerate"""
106        return self.parse_list(para, ref_dict, 'enumerate')
107
108    def parse_itemized_list(self, para, ref_dict):
109        """itemizedlist --> itemize"""
110        return self.parse_list(para, ref_dict, 'itemize')
111
112    def parse_recurse(self, para, ref_dict):
113        """Recursively parse a para element"""
114        # recurse on the contents
115        output = ""
116        for item in para.contents:
117            output += self.parse_para(item, ref_dict)
118        return output
119
120    def parse_para(self, para_node, ref_dict={}):
121        """
122        Parse a paragraph node, handling special doxygen node types
123        that may appear inside a paragraph. Unhandled cases are
124        not parsed and result in an empty string.
125        """
126        parse_table = self.get_parse_table()
127        if para_node.name is None:
128            return self.get_text(para_node, escape=True)
129        elif para_node.name in parse_table:
130            return parse_table[para_node.name](para_node, ref_dict)
131        else:
132            return ""
133
134    def parse_brief(self, parent):
135        """
136        Parse the "brief description" section of a doxygen member.
137        """
138        para_nodes = parent.find('briefdescription').find_all('para')
139        return "\n\n".join([self.parse_para(n) for n in para_nodes])
140
141    def parse_detailed_desc(self, parent, ref_dict):
142        """
143        Parse the "detailed description" section of a doxygen member.
144        """
145        # parse the function parameters
146        params = {}
147        param_order = []
148        types_iter = iter(parent.find_all('type'))
149        names = parent.find_all('declname')
150
151        # the first type is the return type
152        ret_type = six.next(types_iter)
153
154        # the rest are parameters
155        for n in names:
156            param_type = six.next(types_iter).text
157            if param_type == "void":
158                continue
159            params[str(n.text)] = {"type": param_type}
160            param_order.append(str(n.text))
161
162        param_items = parent.find_all("parameteritem")
163        for param_item in param_items:
164            param_name_node = param_item.find("parametername")
165            param_desc_node = param_item.find("parameterdescription")
166
167            param_name = self.get_text(param_name_node, escape=False)
168            param_desc = self.parse_para(param_desc_node.find('para'), ref_dict)
169
170            params[param_name]["desc"] = param_desc
171
172        if len(params) == 0:
173            params_str = self.generate_empty_param_string()
174        else:
175            params_str = ""
176            for param_name in param_order:
177                param_info = params[param_name]
178                params_str += self.generate_param_string(param_info, param_name)
179
180        details = ""
181        for n in parent.detaileddescription.find_all('para', recursive=False):
182            if not n.parameterlist:
183                details += self.parse_para(n, ref_dict)
184                details += "\n\n"
185
186        ret_str = self.get_text(ret_type, escape=False)
187        ret = self.default_return_doc(ret_str.split()[-1])
188        simplesects = parent.find_all("simplesect")
189        for n in simplesects:
190            if n['kind'] == "return":
191                ret = self.parse_para(n.find('para'), ref_dict)
192                break
193        return (self.todo_if_empty(details.strip()), params_str, self.todo_if_empty(ret.strip()))
194
195    def parse_prototype(self, parent, escape=True):
196        """
197        Extract a function prototype from a doxygen member.
198        """
199
200        inline = parent["inline"] == "yes"
201        static = parent["static"] == "yes"
202        ret_type = self.get_text(parent.find("type"), escape)
203        name = self.get_text(parent.find("name"), escape)
204
205        output = "%s %s" % (ret_type, name)
206        if inline:
207            output = "inline " + output
208        if static:
209            output = "static " + output
210
211        return output
212
213    def build_ref_dict(self, soup):
214        """
215        Return a dict mapping reference ids and reference names
216        to details about the referee.
217        """
218
219        ret = {}
220        for member in soup.find_all("memberdef"):
221            name = str(member.find('name').string)
222            label = member.manual['label']
223            heading = member.manual['name']
224            ref_id = member['id']
225            data = {
226                "name": self.text_escape(name),
227                "original_name" : name,
228                "label": label,
229                "ref": ref_id,
230                "heading": heading,
231            }
232
233            ret[ref_id] = data
234            ret[name] = data
235
236        return ret
237
238    def generate_param_string(self, param_info, param_name):
239        return ""
240
241    def generate_empty_param_string(self):
242        return ""
243
244    def generate_api_doc(self, level, member, params, ret, details):
245        return ""
246
247    def todo_if_empty(self, s):
248        """
249        Returns its argument if its argument is non-none and non-empty,
250        otherwise returns "TODO"
251        """
252        return s if s else "TODO"
253
254class LatexGenerator(Generator):
255    """
256    A class that represents the generator for Doxygen to Latex. A child of the Generator class.
257    """
258
259    # Dict mapping characters to their escape sequence in latex
260    ESCAPE_PATTERNS = {
261        "_": "\\_",
262    }
263
264    def get_parse_table(self):
265        parse_table = super(LatexGenerator, self).get_parse_table()
266        parse_table['computeroutput'] = lambda p, r: '\\texttt{%s}' % self.get_text(p)
267        parse_table['texttt'] = lambda p, r: '\\texttt{%s}' % self.get_text(p['text'])
268        parse_table['shortref'] =  lambda p, r: "\\ref{sec:%s}" % p['sec']
269        parse_table['obj'] = lambda p, r: "\\obj{%s}" % p['name']
270        parse_table['errorenumdesc'] = lambda p, r: "\\errorenumdesc"
271        parse_table['listitem'] = lambda p, r: "\\item " + self.parse_para(p.para, r) + "\n"
272        parse_table['autoref'] = lambda p, r: "\\autoref{%s}" % p['label']
273        return parse_table
274
275    def default_return_doc(self, ret_type):
276        """
277        Returns the latex doc for the return value of a function
278        implied by its return type
279        """
280
281        if ret_type == "void":
282            return "\\noret"
283        return ""
284
285    def ref_format(self, refid, ref_dict):
286        """Lookup refid in ref_dict and output the formatted latex reference"""
287
288        ref = ref_dict[refid]
289        return "\\apifunc{%(name)s}{%(label)s}" % ref
290
291    def parse_list(self, para, ref_dict, tag):
292        """Parse an ordered list element"""
293
294        output = '\\begin{%s}\n' % tag
295        for item in para.contents:
296            output += self.parse_para(item, ref_dict)
297        output += '\\end{%s}\n' % tag
298        return output
299
300    def todo_if_empty(self, s):
301        return s if s else "\\todo"
302
303    def generate_param_string(self, param_info, param_name):
304        return "\\param{%(type)s}{%(name)s}{%(desc)s}\n" % {
305                    "type": self.get_text(param_info["type"]),
306                    "name": self.get_text(param_name),
307                    "desc": self.todo_if_empty(param_info.get("desc", "").strip()),
308        }
309
310    def generate_empty_param_string(self):
311        return "\\param{void}{}{}"
312
313    def generate_api_doc(self, level, member, params, ret, details):
314        manual_node = member.manual
315        return """
316\\apidoc
317[{%(level)s}]
318{%(label)s}
319{%(name)s}
320{%(brief)s}
321{%(prototype)s}
322{%(params)s}
323{%(ret)s}
324{%(details)s}
325        """ % {
326            "level": self.level_to_header(level),
327            "label": manual_node["label"],
328            "name": self.text_escape(manual_node["name"]),
329            "brief": self.todo_if_empty(self.parse_brief(member)),
330            "prototype": self.parse_prototype(member),
331            "params": params,
332            "ret": ret,
333            "details": details,
334        }
335
336    def level_to_header(self, level):
337        if level == 0:
338            return 'chapter'
339        elif level == 1:
340            return 'section'
341        elif level == 2:
342            return 'subsection'
343        elif level == 3:
344            return 'subsubsection'
345        else:
346            return 'paragraph'
347
348    def level_to_heading(self, level, name):
349        return '\\' + self.level_to_header(level) + '{' + self.text_escape(name) + '}'
350
351    def gen_label(self, label):
352        return '\\label{' + label + '}\n'
353
354class MarkdownGenerator(Generator):
355    """
356    A class that represents the generator for Doxygen to Markdown. A child of the Generator class
357    """
358
359    # Dict mapping characters to their escape sequence in markdown
360    ESCAPE_PATTERNS = {
361        "`" : "\`",
362        "#" : "\#",
363        "_" : "\_",
364        "*" : "\*",
365        "[" : "\[",
366        "]" : "\]",
367        "-" : "\-",
368        "+" : "\+",
369        "!" : "\!",
370    }
371
372    def get_parse_table(self):
373        parse_table = super(MarkdownGenerator, self).get_parse_table()
374        parse_table['computeroutput'] = lambda p, r: '`%s`' % self.get_text(p, escape=False)
375        parse_table['texttt'] = lambda p, r: '`%s`' % self.get_text(p['text'], escape=False)
376        parse_table['obj'] = lambda p, r: '**%s**' % p['name']
377        parse_table['errorenumdesc'] = lambda p, r: '%s' % self.get_error_num_description()
378        parse_table['listitem'] = lambda p, r: self.parse_para(p.para, r) + "\n\n"
379        parse_table['autoref'] = lambda p, r: "autoref[%s]" % p['label']
380        parse_table['docref'] = lambda p, r: "DOCREF"
381        return parse_table
382
383
384    def default_return_doc(self, ret_type):
385        """
386        Returns the description for the return value of a function
387        implied by its return type
388        """
389
390        if ret_type == "void":
391            return "This method does not return anything."
392        return ""
393
394    def ref_format(self, refid, ref_dict):
395        """
396        Lookup refid in ref_dict and output the formatted Markdown reference
397        Creates a Markdown link
398        """
399
400        ref = ref_dict[refid]
401        ref_anchor = (ref['heading'].lower()).replace(" ", "-")
402        return "[`%s`](#%s)" % (ref['original_name'], ref_anchor)
403
404    def get_error_num_description(self):
405        return "A return value of `0` indicates success. A non-zero value indicates that an error occurred."
406
407    def generate_itemize_list(self, para, ref_dict, output):
408        """ Returns a Markdown item list """
409
410        for item in para.contents:
411            parsed_item = self.parse_para(item, ref_dict)
412            output +="* %s" % parsed_item if parsed_item.rstrip() else ""
413        return output
414
415    def generate_enumerate_list(self, para, ref_dict, output):
416        """ Returns a Markdown number list """
417
418        for num,item in zip(xrange(sys.maxint),para.contents):
419            parsed_item = self.parse_para(item, ref_dict)
420            output +="%d. %s" % (num, parsed_item) if parsed_item.rstrip() else ""
421        return output
422
423    def parse_list(self, para, ref_dict, tag):
424        """Parse an ordered list element"""
425
426        if tag == "enumerate":
427            list_generator =  self.generate_enumerate_list
428        elif tag == "itemize":
429            list_generator = self.generate_itemize_list
430        output = '\n'
431        output += list_generator(para, ref_dict, output)
432        return output
433
434    def todo_if_empty(self, s):
435        return s if s else "*TODO*"
436
437    def generate_params(self, param_string):
438        """
439        Returns the params in a formatted Markdown table
440        """
441
442        if param_string:
443            return """
444Type | Name | Description
445--- | --- | ---
446%s
447            """ % param_string
448        return ""
449
450    def generate_param_string(self, param_info, param_name):
451        return "`%(type)s` | `%(name)s` | %(desc)s\n" % {
452                    "type": self.get_text(param_info["type"],escape=False),
453                    "name": self.get_text(param_name,escape=False),
454                    "desc": self.todo_if_empty(param_info.get("desc", "").strip()),
455        }
456
457    def generate_api_doc(self, level, member, params, ret, details):
458        manual_node = member.manual
459
460        # Descriptions that just contain a document reference are removed.
461        # Found by the 'DOCREF' symbol
462        match_details = re.match( r'^DOCREF$', details, re.M|re.I)
463        if match_details:
464            details_string = ""
465        else:
466            details_string = "**Description:** " + re.sub(r"\n(?!\n)", " ", details)
467
468        ret_string = "**Return value:** " + re.sub("\n(?!\n)", " ", ret)
469
470        # Removed any DOCREF symbols from the return, details and param strings
471        ret_string = re.sub(r'DOCREF', "", ret_string)
472        details_string = re.sub(r'DOCREF', "", details_string)
473        params_string = re.sub(r'DOCREF', "", params)
474
475        return """
476%(hash)s %(name)s
477`%(prototype)s`
478
479%(brief)s
480%(params)s
481%(ret)s
482
483%(details)s
484""" % {
485                "hash": self.level_to_header(level),
486                "name": self.text_escape(manual_node["name"]),
487                "label": manual_node["label"],
488                "brief": self.todo_if_empty(self.parse_brief(member)),
489                "prototype": self.parse_prototype(member, escape=False),
490                "params": self.generate_params(params_string),
491                "ret": ret_string,
492                "details": details_string,
493        }
494
495    def level_to_header(self, level):
496        return (level + 1) * '#'
497
498    def level_to_heading(self, level, name):
499        return self.level_to_header(level) + ' ' + self.text_escape(name) + '\n'
500
501    def gen_label(self, label):
502        return ''
503
504def generate_general_syscall_doc(generator, input_file_name, level):
505    """
506    Takes a path to a file containing doxygen-generated xml,
507    and return a string containing latex suitable for inclusion
508    in the sel4 manual.
509    """
510
511    dir_name = os.path.dirname(input_file_name)
512    with open(input_file_name, "r") as f:
513        output = ""
514        soup = BeautifulSoup(f, "lxml")
515        ref_dict = generator.build_ref_dict(soup)
516        elements = soup.find_all("memberdef")
517        summary = soup.find('compounddef')
518        # parse any top level descriptions
519        for ddesc in summary.find_all('detaileddescription', recursive=False):
520            if ddesc.para:
521                output += generator.parse_para(ddesc.para)
522
523        # parse any nested groups
524        for inner_group in soup.find_all("innergroup"):
525            new_input_file_name = inner_group["refid"] + '.xml'
526            new_input_file = os.path.join(dir_name, new_input_file_name)
527            output += generator.level_to_heading(level, inner_group.text)
528            output += generator.gen_label(inner_group["refid"])
529            output += generate_general_syscall_doc(generator, new_input_file, level + 1)
530
531        # parse all of the function definitions
532        if len(elements) == 0 and output == "":
533            return "No methods."
534
535        for member in elements:
536            manual_node = member.manual
537            details, params, ret = generator.parse_detailed_desc(member, ref_dict)
538            output += generator.generate_api_doc(level, member, params, ret, details)
539        return output
540
541def process_args():
542    """Process script arguments"""
543    parser = argparse.ArgumentParser()
544
545    parser.add_argument("-f", "--format", choices=["latex", "markdown"],
546                        default="latex", help="Format of doxygen output")
547
548    parser.add_argument("-i", "--input", dest="input", type=str,
549                        help="File containing doxygen-generated xml.")
550    parser.add_argument("-o", "--output", dest="output", type=str,
551                        help="Output latex file.")
552
553    parser.add_argument("-l", "--level", type=int,
554                        help="Level for each method, 0 = top level")
555
556    return parser
557
558def main():
559    """Convert doxygen xml into a seL4 API LaTeX manual format"""
560    args = process_args().parse_args()
561
562    if not os.path.exists(os.path.dirname(args.output)):
563        os.makedirs(os.path.dirname(args.output))
564
565    if args.format == "latex":
566        generator = LatexGenerator()
567    elif args.format == "markdown":
568        generator = MarkdownGenerator()
569
570    output_str = generate_general_syscall_doc(generator, args.input, args.level)
571
572    with open(args.output, "w") as output_file:
573        output_file.write(output_str)
574
575if __name__ == "__main__":
576    sys.exit(main())
577