1#!/usr/bin/env python3
2#
3# Copyright 2020, Data61, CSIRO (ABN 41 687 119 230)
4#
5# SPDX-License-Identifier: BSD-2-Clause
6#
7
8"""
9Script for generating latex from doxygen-generated xml files.
10The generatetd latex files are compatible with the seL4 manual.
11"""
12import argparse
13import sys
14import os
15import re
16from bs4 import BeautifulSoup
17import six
18
19
20class Generator(object):
21    # Dict mapping characters to their escape sequence in latex
22    ESCAPE_PATTERNS = {}
23
24    def get_parse_table(self):
25        # table of translations of xml children of 'para' elements
26        parse_table = {
27            'para': self.parse_recurse,
28            'computeroutput': lambda p, r: '%s' % self.get_text(p),
29            'texttt': lambda p, r: '%s' % self.get_text(p['text']),
30            'ref': self.ref_to_format,
31            'nameref': self.nref_to_format,
32            'shortref': lambda p, r: "%s" % p['sec'],
33            'obj': lambda p, r: "%s" % p['name'],
34            'errorenumdesc': lambda p, r: "",
35            'orderedlist': self.parse_ordered_list,
36            'listitem': lambda p, r: self.parse_para(p.para, r),
37            'itemizedlist': self.parse_itemized_list,
38            'autoref': lambda p, r: "%s" % p['label'],
39            'docref': self.parse_recurse
40        }
41        return parse_table
42
43    def default_return_doc(self, ret_type):
44        """
45        Returns the latex doc for the return value of a function
46        implied by its return type
47        """
48
49        return ""
50
51    def text_escape(self, string):
52        """
53        Return a string with latex special characters escaped
54        """
55        escape_regex = re.compile(re.escape('|'.join(self.ESCAPE_PATTERNS.keys())))
56        return escape_regex.sub(lambda p: self.ESCAPE_PATTERNS[p.group()], string)
57
58    def get_text(self, soup, escape=True):
59        """
60        Return a string containing a concatenation of a nodes text node
61        children, recursing into non-text nodes or escaping latex if
62        necessary.
63        """
64
65        if isinstance(soup, str):
66            string = soup
67        elif isinstance(soup, six.string_types):
68            string = str(soup)
69        elif soup.string:
70            string = str(soup.string)
71        else:
72            string = soup.get_text()
73
74        if string is not None:
75            if escape:
76                return self.text_escape(string)
77            else:
78                return string
79
80    def ref_format(self, refid, ref_dict):
81        """Lookup refid in ref_dict and output the api function reference"""
82        return ""
83
84    def ref_to_format(self, para, ref_dict):
85        """Convert a reference by id to a latex command by looking up refid in para"""
86        if len(ref_dict) > 0:
87            return self.ref_format(para["refid"], ref_dict)
88        return ""
89
90    def nref_to_format(self, para, ref_dict):
91        """Convert a reference by name to a latex command by looking up refid in para"""
92        if len(ref_dict) > 0:
93            return self.ref_format(para["name"], ref_dict)
94        return ""
95
96    def parse_list(self, para, ref_dict, tag):
97        return ""
98
99    def parse_ordered_list(self, para, ref_dict):
100        """orderedlist --> enumerate"""
101        return self.parse_list(para, ref_dict, 'enumerate')
102
103    def parse_itemized_list(self, para, ref_dict):
104        """itemizedlist --> itemize"""
105        return self.parse_list(para, ref_dict, 'itemize')
106
107    def parse_recurse(self, para, ref_dict):
108        """Recursively parse a para element"""
109        # recurse on the contents
110        output = ""
111        for item in para.contents:
112            output += self.parse_para(item, ref_dict)
113        return output
114
115    def parse_para(self, para_node, ref_dict={}):
116        """
117        Parse a paragraph node, handling special doxygen node types
118        that may appear inside a paragraph. Unhandled cases are
119        not parsed and result in an empty string.
120        """
121        parse_table = self.get_parse_table()
122        if para_node.name is None:
123            return self.get_text(para_node, escape=True)
124        elif para_node.name in parse_table:
125            return parse_table[para_node.name](para_node, ref_dict)
126        else:
127            return ""
128
129    def parse_brief(self, parent):
130        """
131        Parse the "brief description" section of a doxygen member.
132        """
133        para_nodes = parent.find('briefdescription').find_all('para')
134        return "\n\n".join([self.parse_para(n) for n in para_nodes])
135
136    def parse_detailed_desc(self, parent, ref_dict):
137        """
138        Parse the "detailed description" section of a doxygen member.
139        """
140        # parse the function parameters
141        params = {}
142        param_order = []
143        types_iter = iter(parent.find_all('type'))
144        names = parent.find_all('declname')
145
146        # the first type is the return type
147        ret_type = six.next(types_iter)
148
149        # the rest are parameters
150        for n in names:
151            param_type = six.next(types_iter).text
152            if param_type == "void":
153                continue
154            params[str(n.text)] = {"type": param_type}
155            param_order.append(str(n.text))
156
157        param_items = parent.find_all("parameteritem")
158        for param_item in param_items:
159            param_name_node = param_item.find("parametername")
160            param_desc_node = param_item.find("parameterdescription")
161
162            param_name = self.get_text(param_name_node, escape=False)
163            param_desc = self.parse_para(param_desc_node.find('para'), ref_dict)
164
165            params[param_name]["desc"] = param_desc
166
167        if len(params) == 0:
168            params_str = self.generate_empty_param_string()
169        else:
170            params_str = ""
171            for param_name in param_order:
172                param_info = params[param_name]
173                params_str += self.generate_param_string(param_info, param_name)
174
175        details = ""
176        for n in parent.detaileddescription.find_all('para', recursive=False):
177            if not n.parameterlist:
178                details += self.parse_para(n, ref_dict)
179                details += "\n\n"
180
181        ret_str = self.get_text(ret_type, escape=False)
182        ret = self.default_return_doc(ret_str.split()[-1])
183        simplesects = parent.find_all("simplesect")
184        for n in simplesects:
185            if n['kind'] == "return":
186                ret = self.parse_para(n.find('para'), ref_dict)
187                break
188        return (self.todo_if_empty(details.strip()), params_str, self.todo_if_empty(ret.strip()))
189
190    def parse_prototype(self, parent, escape=True):
191        """
192        Extract a function prototype from a doxygen member.
193        """
194
195        inline = parent["inline"] == "yes"
196        static = parent["static"] == "yes"
197        ret_type = self.get_text(parent.find("type"), escape)
198        name = self.get_text(parent.find("name"), escape)
199
200        output = "%s %s" % (ret_type, name)
201        if inline:
202            output = "inline " + output
203        if static:
204            output = "static " + output
205
206        return output
207
208    def build_ref_dict(self, soup):
209        """
210        Return a dict mapping reference ids and reference names
211        to details about the referee.
212        """
213
214        ret = {}
215        for member in soup.find_all("memberdef"):
216            name = str(member.find('name').string)
217            label = member.manual['label']
218            heading = member.manual['name']
219            ref_id = member['id']
220            data = {
221                "name": self.text_escape(name),
222                "original_name": name,
223                "label": label,
224                "ref": ref_id,
225                "heading": heading,
226            }
227
228            ret[ref_id] = data
229            ret[name] = data
230
231        return ret
232
233    def generate_param_string(self, param_info, param_name):
234        return ""
235
236    def generate_empty_param_string(self):
237        return ""
238
239    def generate_api_doc(self, level, member, params, ret, details):
240        return ""
241
242    def todo_if_empty(self, s):
243        """
244        Returns its argument if its argument is non-none and non-empty,
245        otherwise returns "TODO"
246        """
247        return s if s else "TODO"
248
249
250class LatexGenerator(Generator):
251    """
252    A class that represents the generator for Doxygen to Latex. A child of the Generator class.
253    """
254
255    # Dict mapping characters to their escape sequence in latex
256    ESCAPE_PATTERNS = {
257        "_": "\\_",
258    }
259
260    def get_parse_table(self):
261        parse_table = super(LatexGenerator, self).get_parse_table()
262        parse_table['computeroutput'] = lambda p, r: '\\texttt{%s}' % self.get_text(p)
263        parse_table['texttt'] = lambda p, r: '\\texttt{%s}' % self.get_text(p['text'])
264        parse_table['shortref'] = lambda p, r: "\\ref{sec:%s}" % p['sec']
265        parse_table['obj'] = lambda p, r: "\\obj{%s}" % p['name']
266        parse_table['errorenumdesc'] = lambda p, r: "\\errorenumdesc"
267        parse_table['listitem'] = lambda p, r: "\\item " + self.parse_para(p.para, r) + "\n"
268        parse_table['autoref'] = lambda p, r: "\\autoref{%s}" % p['label']
269        return parse_table
270
271    def default_return_doc(self, ret_type):
272        """
273        Returns the latex doc for the return value of a function
274        implied by its return type
275        """
276
277        if ret_type == "void":
278            return "\\noret"
279        return ""
280
281    def ref_format(self, refid, ref_dict):
282        """Lookup refid in ref_dict and output the formatted latex reference"""
283
284        ref = ref_dict[refid]
285        return "\\apifunc{%(name)s}{%(label)s}" % ref
286
287    def parse_list(self, para, ref_dict, tag):
288        """Parse an ordered list element"""
289
290        output = '\\begin{%s}\n' % tag
291        for item in para.contents:
292            output += self.parse_para(item, ref_dict)
293        output += '\\end{%s}\n' % tag
294        return output
295
296    def todo_if_empty(self, s):
297        return s if s else "\\todo"
298
299    def generate_param_string(self, param_info, param_name):
300        return "\\param{%(type)s}{%(name)s}{%(desc)s}\n" % {
301            "type": self.get_text(param_info["type"]),
302            "name": self.get_text(param_name),
303            "desc": self.todo_if_empty(param_info.get("desc", "").strip()),
304        }
305
306    def generate_empty_param_string(self):
307        return "\\param{void}{}{}"
308
309    def generate_api_doc(self, level, member, params, ret, details):
310        manual_node = member.manual
311        return """
312\\apidoc
313[{%(level)s}]
314{%(label)s}
315{%(name)s}
316{%(brief)s}
317{%(prototype)s}
318{%(params)s}
319{%(ret)s}
320{%(details)s}
321        """ % {
322            "level": self.level_to_header(level),
323            "label": manual_node["label"],
324            "name": self.text_escape(manual_node["name"]),
325            "brief": self.todo_if_empty(self.parse_brief(member)),
326            "prototype": self.parse_prototype(member),
327            "params": params,
328            "ret": ret,
329            "details": details,
330        }
331
332    def level_to_header(self, level):
333        if level == 0:
334            return 'chapter'
335        elif level == 1:
336            return 'section'
337        elif level == 2:
338            return 'subsection'
339        elif level == 3:
340            return 'subsubsection'
341        else:
342            return 'paragraph'
343
344    def level_to_heading(self, level, name):
345        return '\\' + self.level_to_header(level) + '{' + self.text_escape(name) + '}'
346
347    def gen_label(self, label):
348        return '\\label{' + label + '}\n'
349
350
351class MarkdownGenerator(Generator):
352    """
353    A class that represents the generator for Doxygen to Markdown. A child of the Generator class
354    """
355
356    # Dict mapping characters to their escape sequence in markdown
357    ESCAPE_PATTERNS = {
358        "`": "\`",
359        "#": "\#",
360        "_": "\_",
361        "*": "\*",
362        "[": "\[",
363        "]": "\]",
364        "-": "\-",
365        "+": "\+",
366        "!": "\!",
367    }
368
369    def get_parse_table(self):
370        parse_table = super(MarkdownGenerator, self).get_parse_table()
371        parse_table['computeroutput'] = lambda p, r: '`%s`' % self.get_text(p, escape=False)
372        parse_table['texttt'] = lambda p, r: '`%s`' % self.get_text(p['text'], escape=False)
373        parse_table['obj'] = lambda p, r: '**%s**' % p['name']
374        parse_table['errorenumdesc'] = lambda p, r: '%s' % self.get_error_num_description()
375        parse_table['listitem'] = lambda p, r: self.parse_para(p.para, r) + "\n\n"
376        parse_table['autoref'] = lambda p, r: "autoref[%s]" % p['label']
377        parse_table['docref'] = lambda p, r: "DOCREF"
378        return parse_table
379
380    def default_return_doc(self, ret_type):
381        """
382        Returns the description for the return value of a function
383        implied by its return type
384        """
385
386        if ret_type == "void":
387            return "This method does not return anything."
388        return ""
389
390    def ref_format(self, refid, ref_dict):
391        """
392        Lookup refid in ref_dict and output the formatted Markdown reference
393        Creates a Markdown link
394        """
395
396        ref = ref_dict[refid]
397        ref_anchor = (ref['heading'].lower()).replace(" ", "-")
398        return "[`%s`](#%s)" % (ref['original_name'], ref_anchor)
399
400    def get_error_num_description(self):
401        return "A return value of `0` indicates success. A non-zero value indicates that an error occurred."
402
403    def generate_itemize_list(self, para, ref_dict, output):
404        """ Returns a Markdown item list """
405
406        for item in para.contents:
407            parsed_item = self.parse_para(item, ref_dict)
408            output += "* %s" % parsed_item if parsed_item.rstrip() else ""
409        return output
410
411    def generate_enumerate_list(self, para, ref_dict, output):
412        """ Returns a Markdown number list """
413
414        for num, item in zip(range(sys.maxsize), para.contents):
415            parsed_item = self.parse_para(item, ref_dict)
416            output += "%d. %s" % (num, parsed_item) if parsed_item.rstrip() else ""
417        return output
418
419    def parse_list(self, para, ref_dict, tag):
420        """Parse an ordered list element"""
421
422        if tag == "enumerate":
423            list_generator = self.generate_enumerate_list
424        elif tag == "itemize":
425            list_generator = self.generate_itemize_list
426        output = '\n'
427        output += list_generator(para, ref_dict, output)
428        return output
429
430    def todo_if_empty(self, s):
431        return s if s else "*TODO*"
432
433    def generate_params(self, param_string):
434        """
435        Returns the params in a formatted Markdown table
436        """
437
438        if param_string:
439            return """
440Type | Name | Description
441--- | --- | ---
442%s
443            """ % param_string
444        return ""
445
446    def generate_param_string(self, param_info, param_name):
447        return "`%(type)s` | `%(name)s` | %(desc)s\n" % {
448            "type": self.get_text(param_info["type"], escape=False),
449            "name": self.get_text(param_name, escape=False),
450            "desc": self.todo_if_empty(param_info.get("desc", "").strip()),
451        }
452
453    def generate_api_doc(self, level, member, params, ret, details):
454        manual_node = member.manual
455
456        # Descriptions that just contain a document reference are removed.
457        # Found by the 'DOCREF' symbol
458        match_details = re.match(r'^DOCREF$', details, re.M | re.I)
459        if match_details:
460            details_string = ""
461        else:
462            details_string = "**Description:** " + re.sub(r"\n(?!\n)", " ", details)
463
464        ret_string = "**Return value:** " + re.sub("\n(?!\n)", " ", ret)
465
466        # Removed any DOCREF symbols from the return, details and param strings
467        ret_string = re.sub(r'DOCREF', "", ret_string)
468        details_string = re.sub(r'DOCREF', "", details_string)
469        params_string = re.sub(r'DOCREF', "", params)
470
471        return """
472%(hash)s %(name)s
473`%(prototype)s`
474
475%(brief)s
476%(params)s
477%(ret)s
478
479%(details)s
480""" % {
481            "hash": self.level_to_header(level),
482            "name": self.text_escape(manual_node["name"]),
483            "label": manual_node["label"],
484            "brief": self.todo_if_empty(self.parse_brief(member)),
485            "prototype": self.parse_prototype(member, escape=False),
486            "params": self.generate_params(params_string),
487            "ret": ret_string,
488            "details": details_string,
489        }
490
491    def level_to_header(self, level):
492        return (level + 1) * '#'
493
494    def level_to_heading(self, level, name):
495        return self.level_to_header(level) + ' ' + self.text_escape(name) + '\n'
496
497    def gen_label(self, label):
498        return ''
499
500
501def generate_general_syscall_doc(generator, input_file_name, level, ref_dict):
502    """
503    Takes a path to a file containing doxygen-generated xml,
504    and return a string containing latex suitable for inclusion
505    in the sel4 manual.
506    """
507
508    dir_name = os.path.dirname(input_file_name)
509    with open(input_file_name, "r") as f:
510        output = ""
511        soup = BeautifulSoup(f, "lxml")
512        elements = soup.find_all("memberdef")
513        summary = soup.find('compounddef')
514        # parse any top level descriptions
515        for ddesc in summary.find_all('detaileddescription', recursive=False):
516            if ddesc.para:
517                output += generator.parse_para(ddesc.para)
518
519        # parse any nested groups
520        for inner_group in soup.find_all("innergroup"):
521            new_input_file_name = inner_group["refid"] + '.xml'
522            new_input_file = os.path.join(dir_name, new_input_file_name)
523            output += generator.level_to_heading(level, inner_group.text)
524            output += generator.gen_label(inner_group["refid"])
525            output += generate_general_syscall_doc(generator, new_input_file, level + 1, ref_dict)
526
527        # parse all of the function definitions
528        if len(elements) == 0 and output == "":
529            return "No methods."
530
531        for member in elements:
532            manual_node = member.manual
533            details, params, ret = generator.parse_detailed_desc(member, ref_dict)
534            output += generator.generate_api_doc(level, member, params, ret, details)
535        return output
536
537
538def process_args():
539    """Process script arguments"""
540    parser = argparse.ArgumentParser()
541
542    parser.add_argument("-f", "--format", choices=["latex", "markdown"],
543                        default="latex", help="Format of doxygen output")
544
545    parser.add_argument("-i", "--input", dest="input", type=str,
546                        help="File containing doxygen-generated xml.")
547    parser.add_argument("-o", "--output", dest="output", type=str,
548                        help="Output latex file.")
549
550    parser.add_argument("-l", "--level", type=int,
551                        help="Level for each method, 0 = top level")
552
553    return parser
554
555
556def main():
557    """Convert doxygen xml into a seL4 API LaTeX manual format"""
558    args = process_args().parse_args()
559
560    if not os.path.exists(os.path.dirname(args.output)):
561        os.makedirs(os.path.dirname(args.output))
562
563    if args.format == "latex":
564        generator = LatexGenerator()
565    elif args.format == "markdown":
566        generator = MarkdownGenerator()
567
568    dir_name = os.path.dirname(args.input)
569
570    # create the refdict from all the group__*SystemCalls.xml files
571    ref_dict = {}
572    for (r, d, files) in os.walk(dir_name):
573        for f in files:
574            if "SystemCalls" not in f:
575                continue
576            with open(os.path.join(dir_name, f), "r") as source:
577                soup = BeautifulSoup(source, "lxml")
578                ref_dict.update(generator.build_ref_dict(soup))
579
580    output_str = generate_general_syscall_doc(generator, args.input, args.level, ref_dict)
581
582    with open(args.output, "w") as output_file:
583        output_file.write(output_str)
584
585
586if __name__ == "__main__":
587    sys.exit(main())
588