1#!/usr/bin/python -u
2#
3# This is the API builder, it parses the C sources and build the
4# API formal description in XML.
5#
6# See Copyright for the status of this software.
7#
8# daniel@veillard.com
9#
10import os, sys
11import string
12import glob
13
14debug=0
15#debugsym='ignorableWhitespaceSAXFunc'
16debugsym=None
17
18#
19# C parser analysis code
20#
21ignored_files = {
22  "trio": "too many non standard macros",
23  "trio.c": "too many non standard macros",
24  "trionan.c": "too many non standard macros",
25  "triostr.c": "too many non standard macros",
26  "acconfig.h": "generated portability layer",
27  "config.h": "generated portability layer",
28  "libxml.h": "internal only",
29  "testOOM.c": "out of memory tester",
30  "testOOMlib.h": "out of memory tester",
31  "testOOMlib.c": "out of memory tester",
32  "rngparser.c": "not yet integrated",
33  "rngparser.h": "not yet integrated",
34  "elfgcchack.h": "not a normal header",
35  "testHTML.c": "test tool",
36  "testReader.c": "test tool",
37  "testSchemas.c": "test tool",
38  "testXPath.c": "test tool",
39  "testAutomata.c": "test tool",
40  "testModule.c": "test tool",
41  "testRegexp.c": "test tool",
42  "testThreads.c": "test tool",
43  "testC14N.c": "test tool",
44  "testRelax.c": "test tool",
45  "testThreadsWin32.c": "test tool",
46  "testSAX.c": "test tool",
47  "testURI.c": "test tool",
48  "testapi.c": "generated regression tests",
49  "runtest.c": "regression tests program",
50  "runsuite.c": "regression tests program",
51  "tst.c": "not part of the library",
52  "test.c": "not part of the library",
53  "testdso.c": "test for dynamid shared libraries",
54  "testrecurse.c": "test for entities recursions",
55  "xzlib.h": "Internal API only 2.8.0",
56  "buf.h": "Internal API only 2.9.0",
57  "enc.h": "Internal API only 2.9.0",
58  "/save.h": "Internal API only 2.9.0",
59  "timsort.h": "Internal header only for xpath.c 2.9.0",
60}
61
62ignored_words = {
63  "WINAPI": (0, "Windows keyword"),
64  "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
65  "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
66  "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
67  "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
68  "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
69  "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
70  "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
71  "XMLCALL": (0, "Special macro for win32 calls"),
72  "XSLTCALL": (0, "Special macro for win32 calls"),
73  "XMLCDECL": (0, "Special macro for win32 calls"),
74  "EXSLTCALL": (0, "Special macro for win32 calls"),
75  "__declspec": (3, "Windows keyword"),
76  "__stdcall": (0, "Windows keyword"),
77  "ATTRIBUTE_UNUSED": (0, "macro keyword"),
78  "LIBEXSLT_PUBLIC": (0, "macro keyword"),
79  "X_IN_Y": (5, "macro function builder"),
80  "ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"),
81  "ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"),
82  "LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"),
83  "LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"),
84}
85
86def escape(raw):
87    raw = raw.replace('&', '&')
88    raw = raw.replace('<', '&lt;')
89    raw = raw.replace('>', '&gt;')
90    raw = raw.replace("'", '&apos;')
91    raw = raw.replace('"', '&quot;')
92    return raw
93
94def uniq(items):
95    d = {}
96    for item in items:
97        d[item]=1
98    return list(d.keys())
99
100class identifier:
101    def __init__(self, name, header=None, module=None, type=None, lineno = 0,
102                 info=None, extra=None, conditionals = None):
103        self.name = name
104        self.header = header
105        self.module = module
106        self.type = type
107        self.info = info
108        self.extra = extra
109        self.lineno = lineno
110        self.static = 0
111        if conditionals == None or len(conditionals) == 0:
112            self.conditionals = None
113        else:
114            self.conditionals = conditionals[:]
115        if self.name == debugsym:
116            print("=> define %s : %s" % (debugsym, (module, type, info,
117                                         extra, conditionals)))
118
119    def __repr__(self):
120        r = "%s %s:" % (self.type, self.name)
121        if self.static:
122            r = r + " static"
123        if self.module != None:
124            r = r + " from %s" % (self.module)
125        if self.info != None:
126            r = r + " " +  repr(self.info)
127        if self.extra != None:
128            r = r + " " + repr(self.extra)
129        if self.conditionals != None:
130            r = r + " " + repr(self.conditionals)
131        return r
132
133
134    def set_header(self, header):
135        self.header = header
136    def set_module(self, module):
137        self.module = module
138    def set_type(self, type):
139        self.type = type
140    def set_info(self, info):
141        self.info = info
142    def set_extra(self, extra):
143        self.extra = extra
144    def set_lineno(self, lineno):
145        self.lineno = lineno
146    def set_static(self, static):
147        self.static = static
148    def set_conditionals(self, conditionals):
149        if conditionals == None or len(conditionals) == 0:
150            self.conditionals = None
151        else:
152            self.conditionals = conditionals[:]
153
154    def get_name(self):
155        return self.name
156    def get_header(self):
157        return self.module
158    def get_module(self):
159        return self.module
160    def get_type(self):
161        return self.type
162    def get_info(self):
163        return self.info
164    def get_lineno(self):
165        return self.lineno
166    def get_extra(self):
167        return self.extra
168    def get_static(self):
169        return self.static
170    def get_conditionals(self):
171        return self.conditionals
172
173    def update(self, header, module, type = None, info = None, extra=None,
174               conditionals=None):
175        if self.name == debugsym:
176            print("=> update %s : %s" % (debugsym, (module, type, info,
177                                         extra, conditionals)))
178        if header != None and self.header == None:
179            self.set_header(module)
180        if module != None and (self.module == None or self.header == self.module):
181            self.set_module(module)
182        if type != None and self.type == None:
183            self.set_type(type)
184        if info != None:
185            self.set_info(info)
186        if extra != None:
187            self.set_extra(extra)
188        if conditionals != None:
189            self.set_conditionals(conditionals)
190
191class index:
192    def __init__(self, name = "noname"):
193        self.name = name
194        self.identifiers = {}
195        self.functions = {}
196        self.variables = {}
197        self.includes = {}
198        self.structs = {}
199        self.enums = {}
200        self.typedefs = {}
201        self.macros = {}
202        self.references = {}
203        self.info = {}
204
205    def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
206        if name[0:2] == '__':
207            return None
208        d = None
209        try:
210           d = self.identifiers[name]
211           d.update(header, module, type, lineno, info, extra, conditionals)
212        except:
213           d = identifier(name, header, module, type, lineno, info, extra, conditionals)
214           self.identifiers[name] = d
215
216        if d != None and static == 1:
217            d.set_static(1)
218
219        if d != None and name != None and type != None:
220            self.references[name] = d
221
222        if name == debugsym:
223            print("New ref: %s" % (d))
224
225        return d
226
227    def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
228        if name[0:2] == '__':
229            return None
230        d = None
231        try:
232           d = self.identifiers[name]
233           d.update(header, module, type, lineno, info, extra, conditionals)
234        except:
235           d = identifier(name, header, module, type, lineno, info, extra, conditionals)
236           self.identifiers[name] = d
237
238        if d != None and static == 1:
239            d.set_static(1)
240
241        if d != None and name != None and type != None:
242            if type == "function":
243                self.functions[name] = d
244            elif type == "functype":
245                self.functions[name] = d
246            elif type == "variable":
247                self.variables[name] = d
248            elif type == "include":
249                self.includes[name] = d
250            elif type == "struct":
251                self.structs[name] = d
252            elif type == "enum":
253                self.enums[name] = d
254            elif type == "typedef":
255                self.typedefs[name] = d
256            elif type == "macro":
257                self.macros[name] = d
258            else:
259                print("Unable to register type ", type)
260
261        if name == debugsym:
262            print("New symbol: %s" % (d))
263
264        return d
265
266    def merge(self, idx):
267        for id in list(idx.functions.keys()):
268              #
269              # macro might be used to override functions or variables
270              # definitions
271              #
272             if id in self.macros:
273                 del self.macros[id]
274             if id in self.functions:
275                 print("function %s from %s redeclared in %s" % (
276                    id, self.functions[id].header, idx.functions[id].header))
277             else:
278                 self.functions[id] = idx.functions[id]
279                 self.identifiers[id] = idx.functions[id]
280        for id in list(idx.variables.keys()):
281              #
282              # macro might be used to override functions or variables
283              # definitions
284              #
285             if id in self.macros:
286                 del self.macros[id]
287             if id in self.variables:
288                 print("variable %s from %s redeclared in %s" % (
289                    id, self.variables[id].header, idx.variables[id].header))
290             else:
291                 self.variables[id] = idx.variables[id]
292                 self.identifiers[id] = idx.variables[id]
293        for id in list(idx.structs.keys()):
294             if id in self.structs:
295                 print("struct %s from %s redeclared in %s" % (
296                    id, self.structs[id].header, idx.structs[id].header))
297             else:
298                 self.structs[id] = idx.structs[id]
299                 self.identifiers[id] = idx.structs[id]
300        for id in list(idx.typedefs.keys()):
301             if id in self.typedefs:
302                 print("typedef %s from %s redeclared in %s" % (
303                    id, self.typedefs[id].header, idx.typedefs[id].header))
304             else:
305                 self.typedefs[id] = idx.typedefs[id]
306                 self.identifiers[id] = idx.typedefs[id]
307        for id in list(idx.macros.keys()):
308              #
309              # macro might be used to override functions or variables
310              # definitions
311              #
312             if id in self.variables:
313                 continue
314             if id in self.functions:
315                 continue
316             if id in self.enums:
317                 continue
318             if id in self.macros:
319                 print("macro %s from %s redeclared in %s" % (
320                    id, self.macros[id].header, idx.macros[id].header))
321             else:
322                 self.macros[id] = idx.macros[id]
323                 self.identifiers[id] = idx.macros[id]
324        for id in list(idx.enums.keys()):
325             if id in self.enums:
326                 print("enum %s from %s redeclared in %s" % (
327                    id, self.enums[id].header, idx.enums[id].header))
328             else:
329                 self.enums[id] = idx.enums[id]
330                 self.identifiers[id] = idx.enums[id]
331
332    def merge_public(self, idx):
333        for id in list(idx.functions.keys()):
334             if id in self.functions:
335                 # check that function condition agrees with header
336                 if idx.functions[id].conditionals != \
337                    self.functions[id].conditionals:
338                     print("Header condition differs from Function for %s:" \
339                        % id)
340                     print("  H: %s" % self.functions[id].conditionals)
341                     print("  C: %s" % idx.functions[id].conditionals)
342                 up = idx.functions[id]
343                 self.functions[id].update(None, up.module, up.type, up.info, up.extra)
344         #     else:
345         #         print "Function %s from %s is not declared in headers" % (
346         #                id, idx.functions[id].module)
347         # TODO: do the same for variables.
348
349    def analyze_dict(self, type, dict):
350        count = 0
351        public = 0
352        for name in list(dict.keys()):
353            id = dict[name]
354            count = count + 1
355            if id.static == 0:
356                public = public + 1
357        if count != public:
358            print("  %d %s , %d public" % (count, type, public))
359        elif count != 0:
360            print("  %d public %s" % (count, type))
361
362
363    def analyze(self):
364        self.analyze_dict("functions", self.functions)
365        self.analyze_dict("variables", self.variables)
366        self.analyze_dict("structs", self.structs)
367        self.analyze_dict("typedefs", self.typedefs)
368        self.analyze_dict("macros", self.macros)
369
370class CLexer:
371    """A lexer for the C language, tokenize the input by reading and
372       analyzing it line by line"""
373    def __init__(self, input):
374        self.input = input
375        self.tokens = []
376        self.line = ""
377        self.lineno = 0
378
379    def getline(self):
380        line = ''
381        while line == '':
382            line = self.input.readline()
383            if not line:
384                return None
385            self.lineno = self.lineno + 1
386            line = line.lstrip()
387            line = line.rstrip()
388            if line == '':
389                continue
390            while line[-1] == '\\':
391                line = line[:-1]
392                n = self.input.readline()
393                self.lineno = self.lineno + 1
394                n = n.lstrip()
395                n = n.rstrip()
396                if not n:
397                    break
398                else:
399                    line = line + n
400        return line
401
402    def getlineno(self):
403        return self.lineno
404
405    def push(self, token):
406        self.tokens.insert(0, token);
407
408    def debug(self):
409        print("Last token: ", self.last)
410        print("Token queue: ", self.tokens)
411        print("Line %d end: " % (self.lineno), self.line)
412
413    def token(self):
414        while self.tokens == []:
415            if self.line == "":
416                line = self.getline()
417            else:
418                line = self.line
419                self.line = ""
420            if line == None:
421                return None
422
423            if line[0] == '#':
424                self.tokens = list(map((lambda x: ('preproc', x)),
425                                  line.split()))
426                break;
427            l = len(line)
428            if line[0] == '"' or line[0] == "'":
429                end = line[0]
430                line = line[1:]
431                found = 0
432                tok = ""
433                while found == 0:
434                    i = 0
435                    l = len(line)
436                    while i < l:
437                        if line[i] == end:
438                            self.line = line[i+1:]
439                            line = line[:i]
440                            l = i
441                            found = 1
442                            break
443                        if line[i] == '\\':
444                            i = i + 1
445                        i = i + 1
446                    tok = tok + line
447                    if found == 0:
448                        line = self.getline()
449                        if line == None:
450                            return None
451                self.last = ('string', tok)
452                return self.last
453
454            if l >= 2 and line[0] == '/' and line[1] == '*':
455                line = line[2:]
456                found = 0
457                tok = ""
458                while found == 0:
459                    i = 0
460                    l = len(line)
461                    while i < l:
462                        if line[i] == '*' and i+1 < l and line[i+1] == '/':
463                            self.line = line[i+2:]
464                            line = line[:i-1]
465                            l = i
466                            found = 1
467                            break
468                        i = i + 1
469                    if tok != "":
470                        tok = tok + "\n"
471                    tok = tok + line
472                    if found == 0:
473                        line = self.getline()
474                        if line == None:
475                            return None
476                self.last = ('comment', tok)
477                return self.last
478            if l >= 2 and line[0] == '/' and line[1] == '/':
479                line = line[2:]
480                self.last = ('comment', line)
481                return self.last
482            i = 0
483            while i < l:
484                if line[i] == '/' and i+1 < l and line[i+1] == '/':
485                    self.line = line[i:]
486                    line = line[:i]
487                    break
488                if line[i] == '/' and i+1 < l and line[i+1] == '*':
489                    self.line = line[i:]
490                    line = line[:i]
491                    break
492                if line[i] == '"' or line[i] == "'":
493                    self.line = line[i:]
494                    line = line[:i]
495                    break
496                i = i + 1
497            l = len(line)
498            i = 0
499            while i < l:
500                if line[i] == ' ' or line[i] == '\t':
501                    i = i + 1
502                    continue
503                o = ord(line[i])
504                if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
505                   (o >= 48 and o <= 57):
506                    s = i
507                    while i < l:
508                        o = ord(line[i])
509                        if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
510                           (o >= 48 and o <= 57) or \
511			   (" \t(){}:;,+-*/%&!|[]=><".find(line[i])) == -1:
512                            i = i + 1
513                        else:
514                            break
515                    self.tokens.append(('name', line[s:i]))
516                    continue
517                if "(){}:;,[]".find(line[i]) != -1:
518#                 if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
519#                    line[i] == '}' or line[i] == ':' or line[i] == ';' or \
520#                    line[i] == ',' or line[i] == '[' or line[i] == ']':
521                    self.tokens.append(('sep', line[i]))
522                    i = i + 1
523                    continue
524                if "+-*><=/%&!|.".find(line[i]) != -1:
525#                 if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
526#                    line[i] == '>' or line[i] == '<' or line[i] == '=' or \
527#                    line[i] == '/' or line[i] == '%' or line[i] == '&' or \
528#                    line[i] == '!' or line[i] == '|' or line[i] == '.':
529                    if line[i] == '.' and  i + 2 < l and \
530                       line[i+1] == '.' and line[i+2] == '.':
531                        self.tokens.append(('name', '...'))
532                        i = i + 3
533                        continue
534
535                    j = i + 1
536                    if j < l and (
537                       "+-*><=/%&!|".find(line[j]) != -1):
538#                        line[j] == '+' or line[j] == '-' or line[j] == '*' or \
539#                        line[j] == '>' or line[j] == '<' or line[j] == '=' or \
540#                        line[j] == '/' or line[j] == '%' or line[j] == '&' or \
541#                        line[j] == '!' or line[j] == '|'):
542                        self.tokens.append(('op', line[i:j+1]))
543                        i = j + 1
544                    else:
545                        self.tokens.append(('op', line[i]))
546                        i = i + 1
547                    continue
548                s = i
549                while i < l:
550                    o = ord(line[i])
551                    if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
552                       (o >= 48 and o <= 57) or (
553                        " \t(){}:;,+-*/%&!|[]=><".find(line[i]) == -1):
554#                         line[i] != ' ' and line[i] != '\t' and
555#                         line[i] != '(' and line[i] != ')' and
556#                         line[i] != '{'  and line[i] != '}' and
557#                         line[i] != ':' and line[i] != ';' and
558#                         line[i] != ',' and line[i] != '+' and
559#                         line[i] != '-' and line[i] != '*' and
560#                         line[i] != '/' and line[i] != '%' and
561#                         line[i] != '&' and line[i] != '!' and
562#                         line[i] != '|' and line[i] != '[' and
563#                         line[i] != ']' and line[i] != '=' and
564#                         line[i] != '*' and line[i] != '>' and
565#                         line[i] != '<'):
566                        i = i + 1
567                    else:
568                        break
569                self.tokens.append(('name', line[s:i]))
570
571        tok = self.tokens[0]
572        self.tokens = self.tokens[1:]
573        self.last = tok
574        return tok
575
576class CParser:
577    """The C module parser"""
578    def __init__(self, filename, idx = None):
579        self.filename = filename
580        if len(filename) > 2 and filename[-2:] == '.h':
581            self.is_header = 1
582        else:
583            self.is_header = 0
584        self.input = open(filename)
585        self.lexer = CLexer(self.input)
586        if idx == None:
587            self.index = index()
588        else:
589            self.index = idx
590        self.top_comment = ""
591        self.last_comment = ""
592        self.comment = None
593        self.collect_ref = 0
594        self.no_error = 0
595        self.conditionals = []
596        self.defines = []
597
598    def collect_references(self):
599        self.collect_ref = 1
600
601    def stop_error(self):
602        self.no_error = 1
603
604    def start_error(self):
605        self.no_error = 0
606
607    def lineno(self):
608        return self.lexer.getlineno()
609
610    def index_add(self, name, module, static, type, info=None, extra = None):
611        if self.is_header == 1:
612            self.index.add(name, module, module, static, type, self.lineno(),
613                           info, extra, self.conditionals)
614        else:
615            self.index.add(name, None, module, static, type, self.lineno(),
616                           info, extra, self.conditionals)
617
618    def index_add_ref(self, name, module, static, type, info=None,
619                      extra = None):
620        if self.is_header == 1:
621            self.index.add_ref(name, module, module, static, type,
622                               self.lineno(), info, extra, self.conditionals)
623        else:
624            self.index.add_ref(name, None, module, static, type, self.lineno(),
625                               info, extra, self.conditionals)
626
627    def warning(self, msg):
628        if self.no_error:
629            return
630        print(msg)
631
632    def error(self, msg, token=-1):
633        if self.no_error:
634            return
635
636        print("Parse Error: " + msg)
637        if token != -1:
638            print("Got token ", token)
639        self.lexer.debug()
640        sys.exit(1)
641
642    def debug(self, msg, token=-1):
643        print("Debug: " + msg)
644        if token != -1:
645            print("Got token ", token)
646        self.lexer.debug()
647
648    def parseTopComment(self, comment):
649        res = {}
650        lines = comment.split("\n")
651        item = None
652        for line in lines:
653            while line != "" and (line[0] == ' ' or line[0] == '\t'):
654                line = line[1:]
655            while line != "" and line[0] == '*':
656                line = line[1:]
657            while line != "" and (line[0] == ' ' or line[0] == '\t'):
658                line = line[1:]
659            try:
660                (it, line) = line.split(":", 1)
661                item = it
662                while line != "" and (line[0] == ' ' or line[0] == '\t'):
663                    line = line[1:]
664                if item in res:
665                    res[item] = res[item] + " " + line
666                else:
667                    res[item] = line
668            except:
669                if item != None:
670                    if item in res:
671                        res[item] = res[item] + " " + line
672                    else:
673                        res[item] = line
674        self.index.info = res
675
676    def parseComment(self, token):
677        if self.top_comment == "":
678            self.top_comment = token[1]
679        if self.comment == None or token[1][0] == '*':
680            self.comment = token[1];
681        else:
682            self.comment = self.comment + token[1]
683        token = self.lexer.token()
684
685        if self.comment.find("DOC_DISABLE") != -1:
686            self.stop_error()
687
688        if self.comment.find("DOC_ENABLE") != -1:
689            self.start_error()
690
691        return token
692
693    #
694    # Parse a comment block associate to a typedef
695    #
696    def parseTypeComment(self, name, quiet = 0):
697        if name[0:2] == '__':
698            quiet = 1
699
700        args = []
701        desc = ""
702
703        if self.comment == None:
704            if not quiet:
705                self.warning("Missing comment for type %s" % (name))
706            return((args, desc))
707        if self.comment[0] != '*':
708            if not quiet:
709                self.warning("Missing * in type comment for %s" % (name))
710            return((args, desc))
711        lines = self.comment.split('\n')
712        if lines[0] == '*':
713            del lines[0]
714        if lines[0] != "* %s:" % (name):
715            if not quiet:
716                self.warning("Misformatted type comment for %s" % (name))
717                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
718            return((args, desc))
719        del lines[0]
720        while len(lines) > 0 and lines[0] == '*':
721            del lines[0]
722        desc = ""
723        while len(lines) > 0:
724            l = lines[0]
725            while len(l) > 0 and l[0] == '*':
726                l = l[1:]
727            l = l.strip()
728            desc = desc + " " + l
729            del lines[0]
730
731        desc = desc.strip()
732
733        if quiet == 0:
734            if desc == "":
735                self.warning("Type comment for %s lack description of the macro" % (name))
736
737        return(desc)
738    #
739    # Parse a comment block associate to a macro
740    #
741    def parseMacroComment(self, name, quiet = 0):
742        if name[0:2] == '__':
743            quiet = 1
744
745        args = []
746        desc = ""
747
748        if self.comment == None:
749            if not quiet:
750                self.warning("Missing comment for macro %s" % (name))
751            return((args, desc))
752        if self.comment[0] != '*':
753            if not quiet:
754                self.warning("Missing * in macro comment for %s" % (name))
755            return((args, desc))
756        lines = self.comment.split('\n')
757        if lines[0] == '*':
758            del lines[0]
759        if lines[0] != "* %s:" % (name):
760            if not quiet:
761                self.warning("Misformatted macro comment for %s" % (name))
762                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
763            return((args, desc))
764        del lines[0]
765        while lines[0] == '*':
766            del lines[0]
767        while len(lines) > 0 and lines[0][0:3] == '* @':
768            l = lines[0][3:]
769            try:
770                (arg, desc) = l.split(':', 1)
771                desc=desc.strip()
772                arg=arg.strip()
773            except:
774                if not quiet:
775                    self.warning("Misformatted macro comment for %s" % (name))
776                    self.warning("  problem with '%s'" % (lines[0]))
777                del lines[0]
778                continue
779            del lines[0]
780            l = lines[0].strip()
781            while len(l) > 2 and l[0:3] != '* @':
782                while l[0] == '*':
783                    l = l[1:]
784                desc = desc + ' ' + l.strip()
785                del lines[0]
786                if len(lines) == 0:
787                    break
788                l = lines[0]
789            args.append((arg, desc))
790        while len(lines) > 0 and lines[0] == '*':
791            del lines[0]
792        desc = ""
793        while len(lines) > 0:
794            l = lines[0]
795            while len(l) > 0 and l[0] == '*':
796                l = l[1:]
797            l = l.strip()
798            desc = desc + " " + l
799            del lines[0]
800
801        desc = desc.strip()
802
803        if quiet == 0:
804            if desc == "":
805                self.warning("Macro comment for %s lack description of the macro" % (name))
806
807        return((args, desc))
808
809     #
810     # Parse a comment block and merge the informations found in the
811     # parameters descriptions, finally returns a block as complete
812     # as possible
813     #
814    def mergeFunctionComment(self, name, description, quiet = 0):
815        if name == 'main':
816            quiet = 1
817        if name[0:2] == '__':
818            quiet = 1
819
820        (ret, args) = description
821        desc = ""
822        retdesc = ""
823
824        if self.comment == None:
825            if not quiet:
826                self.warning("Missing comment for function %s" % (name))
827            return(((ret[0], retdesc), args, desc))
828        if self.comment[0] != '*':
829            if not quiet:
830                self.warning("Missing * in function comment for %s" % (name))
831            return(((ret[0], retdesc), args, desc))
832        lines = self.comment.split('\n')
833        if lines[0] == '*':
834            del lines[0]
835        if lines[0] != "* %s:" % (name):
836            if not quiet:
837                self.warning("Misformatted function comment for %s" % (name))
838                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
839            return(((ret[0], retdesc), args, desc))
840        del lines[0]
841        while lines[0] == '*':
842            del lines[0]
843        nbargs = len(args)
844        while len(lines) > 0 and lines[0][0:3] == '* @':
845            l = lines[0][3:]
846            try:
847                (arg, desc) = l.split(':', 1)
848                desc=desc.strip()
849                arg=arg.strip()
850            except:
851                if not quiet:
852                    self.warning("Misformatted function comment for %s" % (name))
853                    self.warning("  problem with '%s'" % (lines[0]))
854                del lines[0]
855                continue
856            del lines[0]
857            l = lines[0].strip()
858            while len(l) > 2 and l[0:3] != '* @':
859                while l[0] == '*':
860                    l = l[1:]
861                desc = desc + ' ' + l.strip()
862                del lines[0]
863                if len(lines) == 0:
864                    break
865                l = lines[0]
866            i = 0
867            while i < nbargs:
868                if args[i][1] == arg:
869                    args[i] = (args[i][0], arg, desc)
870                    break;
871                i = i + 1
872            if i >= nbargs:
873                if not quiet:
874                    self.warning("Unable to find arg %s from function comment for %s" % (
875                       arg, name))
876        while len(lines) > 0 and lines[0] == '*':
877            del lines[0]
878        desc = ""
879        while len(lines) > 0:
880            l = lines[0]
881            while len(l) > 0 and l[0] == '*':
882                l = l[1:]
883            l = l.strip()
884            if len(l) >= 6 and  l[0:6] == "return" or l[0:6] == "Return":
885                try:
886                    l = l.split(' ', 1)[1]
887                except:
888                    l = ""
889                retdesc = l.strip()
890                del lines[0]
891                while len(lines) > 0:
892                    l = lines[0]
893                    while len(l) > 0 and l[0] == '*':
894                        l = l[1:]
895                    l = l.strip()
896                    retdesc = retdesc + " " + l
897                    del lines[0]
898            else:
899                desc = desc + " " + l
900                del lines[0]
901
902        retdesc = retdesc.strip()
903        desc = desc.strip()
904
905        if quiet == 0:
906             #
907             # report missing comments
908             #
909            i = 0
910            while i < nbargs:
911                if args[i][2] == None and args[i][0] != "void" and \
912                   ((args[i][1] != None) or (args[i][1] == '')):
913                    self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
914                i = i + 1
915            if retdesc == "" and ret[0] != "void":
916                self.warning("Function comment for %s lacks description of return value" % (name))
917            if desc == "":
918                self.warning("Function comment for %s lacks description of the function" % (name))
919
920        return(((ret[0], retdesc), args, desc))
921
922    def parsePreproc(self, token):
923        if debug:
924            print("=> preproc ", token, self.lexer.tokens)
925        name = token[1]
926        if name == "#include":
927            token = self.lexer.token()
928            if token == None:
929                return None
930            if token[0] == 'preproc':
931                self.index_add(token[1], self.filename, not self.is_header,
932                                "include")
933                return self.lexer.token()
934            return token
935        if name == "#define":
936            token = self.lexer.token()
937            if token == None:
938                return None
939            if token[0] == 'preproc':
940                 # TODO macros with arguments
941                name = token[1]
942                lst = []
943                token = self.lexer.token()
944                while token != None and token[0] == 'preproc' and \
945                      token[1][0] != '#':
946                    lst.append(token[1])
947                    token = self.lexer.token()
948                try:
949                    name = name.split('(') [0]
950                except:
951                    pass
952                info = self.parseMacroComment(name, not self.is_header)
953                self.index_add(name, self.filename, not self.is_header,
954                                "macro", info)
955                return token
956
957        #
958        # Processing of conditionals modified by Bill 1/1/05
959        #
960        # We process conditionals (i.e. tokens from #ifdef, #ifndef,
961        # #if, #else and #endif) for headers and mainline code,
962        # store the ones from the header in libxml2-api.xml, and later
963        # (in the routine merge_public) verify that the two (header and
964        # mainline code) agree.
965        #
966        # There is a small problem with processing the headers. Some of
967        # the variables are not concerned with enabling / disabling of
968        # library functions (e.g. '__XML_PARSER_H__'), and we don't want
969        # them to be included in libxml2-api.xml, or involved in
970        # the check between the header and the mainline code.  To
971        # accomplish this, we ignore any conditional which doesn't include
972        # the string 'ENABLED'
973        #
974        if name == "#ifdef":
975            apstr = self.lexer.tokens[0][1]
976            try:
977                self.defines.append(apstr)
978                if apstr.find('ENABLED') != -1:
979                    self.conditionals.append("defined(%s)" % apstr)
980            except:
981                pass
982        elif name == "#ifndef":
983            apstr = self.lexer.tokens[0][1]
984            try:
985                self.defines.append(apstr)
986                if apstr.find('ENABLED') != -1:
987                    self.conditionals.append("!defined(%s)" % apstr)
988            except:
989                pass
990        elif name == "#if":
991            apstr = ""
992            for tok in self.lexer.tokens:
993                if apstr != "":
994                    apstr = apstr + " "
995                apstr = apstr + tok[1]
996            try:
997                self.defines.append(apstr)
998                if apstr.find('ENABLED') != -1:
999                    self.conditionals.append(apstr)
1000            except:
1001                pass
1002        elif name == "#else":
1003            if self.conditionals != [] and \
1004               self.defines[-1].find('ENABLED') != -1:
1005                self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
1006        elif name == "#endif":
1007            if self.conditionals != [] and \
1008               self.defines[-1].find('ENABLED') != -1:
1009                self.conditionals = self.conditionals[:-1]
1010            self.defines = self.defines[:-1]
1011        token = self.lexer.token()
1012        while token != None and token[0] == 'preproc' and \
1013            token[1][0] != '#':
1014            token = self.lexer.token()
1015        return token
1016
1017     #
1018     # token acquisition on top of the lexer, it handle internally
1019     # preprocessor and comments since they are logically not part of
1020     # the program structure.
1021     #
1022    def token(self):
1023        global ignored_words
1024
1025        token = self.lexer.token()
1026        while token != None:
1027            if token[0] == 'comment':
1028                token = self.parseComment(token)
1029                continue
1030            elif token[0] == 'preproc':
1031                token = self.parsePreproc(token)
1032                continue
1033            elif token[0] == "name" and token[1] == "__const":
1034                token = ("name", "const")
1035                return token
1036            elif token[0] == "name" and token[1] == "__attribute":
1037                token = self.lexer.token()
1038                while token != None and token[1] != ";":
1039                    token = self.lexer.token()
1040                return token
1041            elif token[0] == "name" and token[1] in ignored_words:
1042                (n, info) = ignored_words[token[1]]
1043                i = 0
1044                while i < n:
1045                    token = self.lexer.token()
1046                    i = i + 1
1047                token = self.lexer.token()
1048                continue
1049            else:
1050                if debug:
1051                    print("=> ", token)
1052                return token
1053        return None
1054
1055     #
1056     # Parse a typedef, it records the type and its name.
1057     #
1058    def parseTypedef(self, token):
1059        if token == None:
1060            return None
1061        token = self.parseType(token)
1062        if token == None:
1063            self.error("parsing typedef")
1064            return None
1065        base_type = self.type
1066        type = base_type
1067         #self.debug("end typedef type", token)
1068        while token != None:
1069            if token[0] == "name":
1070                name = token[1]
1071                signature = self.signature
1072                if signature != None:
1073                    type = type.split('(')[0]
1074                    d = self.mergeFunctionComment(name,
1075                            ((type, None), signature), 1)
1076                    self.index_add(name, self.filename, not self.is_header,
1077                                    "functype", d)
1078                else:
1079                    if base_type == "struct":
1080                        self.index_add(name, self.filename, not self.is_header,
1081                                        "struct", type)
1082                        base_type = "struct " + name
1083                    else:
1084                        # TODO report missing or misformatted comments
1085                        info = self.parseTypeComment(name, 1)
1086                        self.index_add(name, self.filename, not self.is_header,
1087                                    "typedef", type, info)
1088                token = self.token()
1089            else:
1090                self.error("parsing typedef: expecting a name")
1091                return token
1092             #self.debug("end typedef", token)
1093            if token != None and token[0] == 'sep' and token[1] == ',':
1094                type = base_type
1095                token = self.token()
1096                while token != None and token[0] == "op":
1097                    type = type + token[1]
1098                    token = self.token()
1099            elif token != None and token[0] == 'sep' and token[1] == ';':
1100                break;
1101            elif token != None and token[0] == 'name':
1102                type = base_type
1103                continue;
1104            else:
1105                self.error("parsing typedef: expecting ';'", token)
1106                return token
1107        token = self.token()
1108        return token
1109
1110     #
1111     # Parse a C code block, used for functions it parse till
1112     # the balancing } included
1113     #
1114    def parseBlock(self, token):
1115        while token != None:
1116            if token[0] == "sep" and token[1] == "{":
1117                token = self.token()
1118                token = self.parseBlock(token)
1119            elif token[0] == "sep" and token[1] == "}":
1120                self.comment = None
1121                token = self.token()
1122                return token
1123            else:
1124                if self.collect_ref == 1:
1125                    oldtok = token
1126                    token = self.token()
1127                    if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
1128                        if token[0] == "sep" and token[1] == "(":
1129                            self.index_add_ref(oldtok[1], self.filename,
1130                                                0, "function")
1131                            token = self.token()
1132                        elif token[0] == "name":
1133                            token = self.token()
1134                            if token[0] == "sep" and (token[1] == ";" or
1135                               token[1] == "," or token[1] == "="):
1136                                self.index_add_ref(oldtok[1], self.filename,
1137                                                    0, "type")
1138                    elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
1139                        self.index_add_ref(oldtok[1], self.filename,
1140                                            0, "typedef")
1141                    elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
1142                        self.index_add_ref(oldtok[1], self.filename,
1143                                            0, "typedef")
1144
1145                else:
1146                    token = self.token()
1147        return token
1148
1149     #
1150     # Parse a C struct definition till the balancing }
1151     #
1152    def parseStruct(self, token):
1153        fields = []
1154         #self.debug("start parseStruct", token)
1155        while token != None:
1156            if token[0] == "sep" and token[1] == "{":
1157                token = self.token()
1158                token = self.parseTypeBlock(token)
1159            elif token[0] == "sep" and token[1] == "}":
1160                self.struct_fields = fields
1161                 #self.debug("end parseStruct", token)
1162                 #print fields
1163                token = self.token()
1164                return token
1165            else:
1166                base_type = self.type
1167                 #self.debug("before parseType", token)
1168                token = self.parseType(token)
1169                 #self.debug("after parseType", token)
1170                if token != None and token[0] == "name":
1171                    fname = token[1]
1172                    token = self.token()
1173                    if token[0] == "sep" and token[1] == ";":
1174                        self.comment = None
1175                        token = self.token()
1176                        fields.append((self.type, fname, self.comment))
1177                        self.comment = None
1178                    else:
1179                        self.error("parseStruct: expecting ;", token)
1180                elif token != None and token[0] == "sep" and token[1] == "{":
1181                    token = self.token()
1182                    token = self.parseTypeBlock(token)
1183                    if token != None and token[0] == "name":
1184                        token = self.token()
1185                    if token != None and token[0] == "sep" and token[1] == ";":
1186                        token = self.token()
1187                    else:
1188                        self.error("parseStruct: expecting ;", token)
1189                else:
1190                    self.error("parseStruct: name", token)
1191                    token = self.token()
1192                self.type = base_type;
1193        self.struct_fields = fields
1194         #self.debug("end parseStruct", token)
1195         #print fields
1196        return token
1197
1198     #
1199     # Parse a C enum block, parse till the balancing }
1200     #
1201    def parseEnumBlock(self, token):
1202        self.enums = []
1203        name = None
1204        self.comment = None
1205        comment = ""
1206        value = "0"
1207        while token != None:
1208            if token[0] == "sep" and token[1] == "{":
1209                token = self.token()
1210                token = self.parseTypeBlock(token)
1211            elif token[0] == "sep" and token[1] == "}":
1212                if name != None:
1213                    if self.comment != None:
1214                        comment = self.comment
1215                        self.comment = None
1216                    self.enums.append((name, value, comment))
1217                token = self.token()
1218                return token
1219            elif token[0] == "name":
1220                    if name != None:
1221                        if self.comment != None:
1222                            comment = self.comment.strip()
1223                            self.comment = None
1224                        self.enums.append((name, value, comment))
1225                    name = token[1]
1226                    comment = ""
1227                    token = self.token()
1228                    if token[0] == "op" and token[1][0] == "=":
1229                        value = ""
1230                        if len(token[1]) > 1:
1231                            value = token[1][1:]
1232                        token = self.token()
1233                        while token[0] != "sep" or (token[1] != ',' and
1234                              token[1] != '}'):
1235                            value = value + token[1]
1236                            token = self.token()
1237                    else:
1238                        try:
1239                            value = "%d" % (int(value) + 1)
1240                        except:
1241                            self.warning("Failed to compute value of enum %s" % (name))
1242                            value=""
1243                    if token[0] == "sep" and token[1] == ",":
1244                        token = self.token()
1245            else:
1246                token = self.token()
1247        return token
1248
1249     #
1250     # Parse a C definition block, used for structs it parse till
1251     # the balancing }
1252     #
1253    def parseTypeBlock(self, token):
1254        while token != None:
1255            if token[0] == "sep" and token[1] == "{":
1256                token = self.token()
1257                token = self.parseTypeBlock(token)
1258            elif token[0] == "sep" and token[1] == "}":
1259                token = self.token()
1260                return token
1261            else:
1262                token = self.token()
1263        return token
1264
1265     #
1266     # Parse a type: the fact that the type name can either occur after
1267     #    the definition or within the definition makes it a little harder
1268     #    if inside, the name token is pushed back before returning
1269     #
1270    def parseType(self, token):
1271        self.type = ""
1272        self.struct_fields = []
1273        self.signature = None
1274        if token == None:
1275            return token
1276
1277        while token[0] == "name" and (
1278              token[1] == "const" or \
1279              token[1] == "unsigned" or \
1280              token[1] == "signed"):
1281            if self.type == "":
1282                self.type = token[1]
1283            else:
1284                self.type = self.type + " " + token[1]
1285            token = self.token()
1286
1287        if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
1288            if self.type == "":
1289                self.type = token[1]
1290            else:
1291                self.type = self.type + " " + token[1]
1292            if token[0] == "name" and token[1] == "int":
1293                if self.type == "":
1294                    self.type = tmp[1]
1295                else:
1296                    self.type = self.type + " " + tmp[1]
1297
1298        elif token[0] == "name" and token[1] == "struct":
1299            if self.type == "":
1300                self.type = token[1]
1301            else:
1302                self.type = self.type + " " + token[1]
1303            token = self.token()
1304            nametok = None
1305            if token[0] == "name":
1306                nametok = token
1307                token = self.token()
1308            if token != None and token[0] == "sep" and token[1] == "{":
1309                token = self.token()
1310                token = self.parseStruct(token)
1311            elif token != None and token[0] == "op" and token[1] == "*":
1312                self.type = self.type + " " + nametok[1] + " *"
1313                token = self.token()
1314                while token != None and token[0] == "op" and token[1] == "*":
1315                    self.type = self.type + " *"
1316                    token = self.token()
1317                if token[0] == "name":
1318                    nametok = token
1319                    token = self.token()
1320                else:
1321                    self.error("struct : expecting name", token)
1322                    return token
1323            elif token != None and token[0] == "name" and nametok != None:
1324                self.type = self.type + " " + nametok[1]
1325                return token
1326
1327            if nametok != None:
1328                self.lexer.push(token)
1329                token = nametok
1330            return token
1331
1332        elif token[0] == "name" and token[1] == "enum":
1333            if self.type == "":
1334                self.type = token[1]
1335            else:
1336                self.type = self.type + " " + token[1]
1337            self.enums = []
1338            token = self.token()
1339            if token != None and token[0] == "sep" and token[1] == "{":
1340                token = self.token()
1341                token = self.parseEnumBlock(token)
1342            else:
1343                self.error("parsing enum: expecting '{'", token)
1344            enum_type = None
1345            if token != None and token[0] != "name":
1346                self.lexer.push(token)
1347                token = ("name", "enum")
1348            else:
1349                enum_type = token[1]
1350            for enum in self.enums:
1351                self.index_add(enum[0], self.filename,
1352                               not self.is_header, "enum",
1353                               (enum[1], enum[2], enum_type))
1354            return token
1355
1356        elif token[0] == "name":
1357            if self.type == "":
1358                self.type = token[1]
1359            else:
1360                self.type = self.type + " " + token[1]
1361        else:
1362            self.error("parsing type %s: expecting a name" % (self.type),
1363                       token)
1364            return token
1365        token = self.token()
1366        while token != None and (token[0] == "op" or
1367              token[0] == "name" and token[1] == "const"):
1368            self.type = self.type + " " + token[1]
1369            token = self.token()
1370
1371         #
1372         # if there is a parenthesis here, this means a function type
1373         #
1374        if token != None and token[0] == "sep" and token[1] == '(':
1375            self.type = self.type + token[1]
1376            token = self.token()
1377            while token != None and token[0] == "op" and token[1] == '*':
1378                self.type = self.type + token[1]
1379                token = self.token()
1380            if token == None or token[0] != "name" :
1381                self.error("parsing function type, name expected", token);
1382                return token
1383            self.type = self.type + token[1]
1384            nametok = token
1385            token = self.token()
1386            if token != None and token[0] == "sep" and token[1] == ')':
1387                self.type = self.type + token[1]
1388                token = self.token()
1389                if token != None and token[0] == "sep" and token[1] == '(':
1390                    token = self.token()
1391                    type = self.type;
1392                    token = self.parseSignature(token);
1393                    self.type = type;
1394                else:
1395                    self.error("parsing function type, '(' expected", token);
1396                    return token
1397            else:
1398                self.error("parsing function type, ')' expected", token);
1399                return token
1400            self.lexer.push(token)
1401            token = nametok
1402            return token
1403
1404         #
1405         # do some lookahead for arrays
1406         #
1407        if token != None and token[0] == "name":
1408            nametok = token
1409            token = self.token()
1410            if token != None and token[0] == "sep" and token[1] == '[':
1411                self.type = self.type + nametok[1]
1412                while token != None and token[0] == "sep" and token[1] == '[':
1413                    self.type = self.type + token[1]
1414                    token = self.token()
1415                    while token != None and token[0] != 'sep' and \
1416                          token[1] != ']' and token[1] != ';':
1417                        self.type = self.type + token[1]
1418                        token = self.token()
1419                if token != None and token[0] == 'sep' and token[1] == ']':
1420                    self.type = self.type + token[1]
1421                    token = self.token()
1422                else:
1423                    self.error("parsing array type, ']' expected", token);
1424                    return token
1425            elif token != None and token[0] == "sep" and token[1] == ':':
1426                 # remove :12 in case it's a limited int size
1427                token = self.token()
1428                token = self.token()
1429            self.lexer.push(token)
1430            token = nametok
1431
1432        return token
1433
1434     #
1435     # Parse a signature: '(' has been parsed and we scan the type definition
1436     #    up to the ')' included
1437    def parseSignature(self, token):
1438        signature = []
1439        if token != None and token[0] == "sep" and token[1] == ')':
1440            self.signature = []
1441            token = self.token()
1442            return token
1443        while token != None:
1444            token = self.parseType(token)
1445            if token != None and token[0] == "name":
1446                signature.append((self.type, token[1], None))
1447                token = self.token()
1448            elif token != None and token[0] == "sep" and token[1] == ',':
1449                token = self.token()
1450                continue
1451            elif token != None and token[0] == "sep" and token[1] == ')':
1452                 # only the type was provided
1453                if self.type == "...":
1454                    signature.append((self.type, "...", None))
1455                else:
1456                    signature.append((self.type, None, None))
1457            if token != None and token[0] == "sep":
1458                if token[1] == ',':
1459                    token = self.token()
1460                    continue
1461                elif token[1] == ')':
1462                    token = self.token()
1463                    break
1464        self.signature = signature
1465        return token
1466
1467     #
1468     # Parse a global definition, be it a type, variable or function
1469     # the extern "C" blocks are a bit nasty and require it to recurse.
1470     #
1471    def parseGlobal(self, token):
1472        static = 0
1473        if token[1] == 'extern':
1474            token = self.token()
1475            if token == None:
1476                return token
1477            if token[0] == 'string':
1478                if token[1] == 'C':
1479                    token = self.token()
1480                    if token == None:
1481                        return token
1482                    if token[0] == 'sep' and token[1] == "{":
1483                        token = self.token()
1484#                         print 'Entering extern "C line ', self.lineno()
1485                        while token != None and (token[0] != 'sep' or
1486                              token[1] != "}"):
1487                            if token[0] == 'name':
1488                                token = self.parseGlobal(token)
1489                            else:
1490                                self.error(
1491                                 "token %s %s unexpected at the top level" % (
1492                                        token[0], token[1]))
1493                                token = self.parseGlobal(token)
1494#                         print 'Exiting extern "C" line', self.lineno()
1495                        token = self.token()
1496                        return token
1497                else:
1498                    return token
1499        elif token[1] == 'static':
1500            static = 1
1501            token = self.token()
1502            if token == None or  token[0] != 'name':
1503                return token
1504
1505        if token[1] == 'typedef':
1506            token = self.token()
1507            return self.parseTypedef(token)
1508        else:
1509            token = self.parseType(token)
1510            type_orig = self.type
1511        if token == None or token[0] != "name":
1512            return token
1513        type = type_orig
1514        self.name = token[1]
1515        token = self.token()
1516        while token != None and (token[0] == "sep" or token[0] == "op"):
1517            if token[0] == "sep":
1518                if token[1] == "[":
1519                    type = type + token[1]
1520                    token = self.token()
1521                    while token != None and (token[0] != "sep" or \
1522                          token[1] != ";"):
1523                        type = type + token[1]
1524                        token = self.token()
1525
1526            if token != None and token[0] == "op" and token[1] == "=":
1527                 #
1528                 # Skip the initialization of the variable
1529                 #
1530                token = self.token()
1531                if token[0] == 'sep' and token[1] == '{':
1532                    token = self.token()
1533                    token = self.parseBlock(token)
1534                else:
1535                    self.comment = None
1536                    while token != None and (token[0] != "sep" or \
1537                          (token[1] != ';' and token[1] != ',')):
1538                            token = self.token()
1539                self.comment = None
1540                if token == None or token[0] != "sep" or (token[1] != ';' and
1541                   token[1] != ','):
1542                    self.error("missing ';' or ',' after value")
1543
1544            if token != None and token[0] == "sep":
1545                if token[1] == ";":
1546                    self.comment = None
1547                    token = self.token()
1548                    if type == "struct":
1549                        self.index_add(self.name, self.filename,
1550                             not self.is_header, "struct", self.struct_fields)
1551                    else:
1552                        self.index_add(self.name, self.filename,
1553                             not self.is_header, "variable", type)
1554                    break
1555                elif token[1] == "(":
1556                    token = self.token()
1557                    token = self.parseSignature(token)
1558                    if token == None:
1559                        return None
1560                    if token[0] == "sep" and token[1] == ";":
1561                        d = self.mergeFunctionComment(self.name,
1562                                ((type, None), self.signature), 1)
1563                        self.index_add(self.name, self.filename, static,
1564                                        "function", d)
1565                        token = self.token()
1566                    elif token[0] == "sep" and token[1] == "{":
1567                        d = self.mergeFunctionComment(self.name,
1568                                ((type, None), self.signature), static)
1569                        self.index_add(self.name, self.filename, static,
1570                                        "function", d)
1571                        token = self.token()
1572                        token = self.parseBlock(token);
1573                elif token[1] == ',':
1574                    self.comment = None
1575                    self.index_add(self.name, self.filename, static,
1576                                    "variable", type)
1577                    type = type_orig
1578                    token = self.token()
1579                    while token != None and token[0] == "sep":
1580                        type = type + token[1]
1581                        token = self.token()
1582                    if token != None and token[0] == "name":
1583                        self.name = token[1]
1584                        token = self.token()
1585                else:
1586                    break
1587
1588        return token
1589
1590    def parse(self):
1591        self.warning("Parsing %s" % (self.filename))
1592        token = self.token()
1593        while token != None:
1594            if token[0] == 'name':
1595                token = self.parseGlobal(token)
1596            else:
1597                self.error("token %s %s unexpected at the top level" % (
1598                       token[0], token[1]))
1599                token = self.parseGlobal(token)
1600                return
1601        self.parseTopComment(self.top_comment)
1602        return self.index
1603
1604
1605class docBuilder:
1606    """A documentation builder"""
1607    def __init__(self, name, directories=['.'], excludes=[]):
1608        self.name = name
1609        self.directories = directories
1610        self.excludes = excludes + list(ignored_files.keys())
1611        self.modules = {}
1612        self.headers = {}
1613        self.idx = index()
1614        self.xref = {}
1615        self.index = {}
1616        if name == 'libxml2':
1617            self.basename = 'libxml'
1618        else:
1619            self.basename = name
1620
1621    def indexString(self, id, str):
1622        if str == None:
1623            return
1624        str = str.replace("'", ' ')
1625        str = str.replace('"', ' ')
1626        str = str.replace("/", ' ')
1627        str = str.replace('*', ' ')
1628        str = str.replace("[", ' ')
1629        str = str.replace("]", ' ')
1630        str = str.replace("(", ' ')
1631        str = str.replace(")", ' ')
1632        str = str.replace("<", ' ')
1633        str = str.replace('>', ' ')
1634        str = str.replace("&", ' ')
1635        str = str.replace('#', ' ')
1636        str = str.replace(",", ' ')
1637        str = str.replace('.', ' ')
1638        str = str.replace(';', ' ')
1639        tokens = str.split()
1640        for token in tokens:
1641            try:
1642                c = token[0]
1643                if string.ascii_letters.find(c) < 0:
1644                    pass
1645                elif len(token) < 3:
1646                    pass
1647                else:
1648                    lower = token.lower()
1649                    # TODO: generalize this a bit
1650                    if lower == 'and' or lower == 'the':
1651                        pass
1652                    elif token in self.xref:
1653                        self.xref[token].append(id)
1654                    else:
1655                        self.xref[token] = [id]
1656            except:
1657                pass
1658
1659    def analyze(self):
1660        print("Project %s : %d headers, %d modules" % (self.name, len(list(self.headers.keys())), len(list(self.modules.keys()))))
1661        self.idx.analyze()
1662
1663    def scanHeaders(self):
1664        for header in list(self.headers.keys()):
1665            parser = CParser(header)
1666            idx = parser.parse()
1667            self.headers[header] = idx;
1668            self.idx.merge(idx)
1669
1670    def scanModules(self):
1671        for module in list(self.modules.keys()):
1672            parser = CParser(module)
1673            idx = parser.parse()
1674            # idx.analyze()
1675            self.modules[module] = idx
1676            self.idx.merge_public(idx)
1677
1678    def scan(self):
1679        for directory in self.directories:
1680            files = glob.glob(directory + "/*.c")
1681            for file in files:
1682                skip = 0
1683                for excl in self.excludes:
1684                    if file.find(excl) != -1:
1685                        print("Skipping %s" % file)
1686                        skip = 1
1687                        break
1688                if skip == 0:
1689                    self.modules[file] = None;
1690            files = glob.glob(directory + "/*.h")
1691            for file in files:
1692                skip = 0
1693                for excl in self.excludes:
1694                    if file.find(excl) != -1:
1695                        print("Skipping %s" % file)
1696                        skip = 1
1697                        break
1698                if skip == 0:
1699                    self.headers[file] = None;
1700        self.scanHeaders()
1701        self.scanModules()
1702
1703    def modulename_file(self, file):
1704        module = os.path.basename(file)
1705        if module[-2:] == '.h':
1706            module = module[:-2]
1707        elif module[-2:] == '.c':
1708            module = module[:-2]
1709        return module
1710
1711    def serialize_enum(self, output, name):
1712        id = self.idx.enums[name]
1713        output.write("    <enum name='%s' file='%s'" % (name,
1714                     self.modulename_file(id.header)))
1715        if id.info != None:
1716            info = id.info
1717            if info[0] != None and info[0] != '':
1718                try:
1719                    val = eval(info[0])
1720                except:
1721                    val = info[0]
1722                output.write(" value='%s'" % (val));
1723            if info[2] != None and info[2] != '':
1724                output.write(" type='%s'" % info[2]);
1725            if info[1] != None and info[1] != '':
1726                output.write(" info='%s'" % escape(info[1]));
1727        output.write("/>\n")
1728
1729    def serialize_macro(self, output, name):
1730        id = self.idx.macros[name]
1731        output.write("    <macro name='%s' file='%s'>\n" % (name,
1732                     self.modulename_file(id.header)))
1733        if id.info != None:
1734            try:
1735                (args, desc) = id.info
1736                if desc != None and desc != "":
1737                    output.write("      <info>%s</info>\n" % (escape(desc)))
1738                    self.indexString(name, desc)
1739                for arg in args:
1740                    (name, desc) = arg
1741                    if desc != None and desc != "":
1742                        output.write("      <arg name='%s' info='%s'/>\n" % (
1743                                     name, escape(desc)))
1744                        self.indexString(name, desc)
1745                    else:
1746                        output.write("      <arg name='%s'/>\n" % (name))
1747            except:
1748                pass
1749        output.write("    </macro>\n")
1750
1751    def serialize_typedef(self, output, name):
1752        id = self.idx.typedefs[name]
1753        if id.info[0:7] == 'struct ':
1754            output.write("    <struct name='%s' file='%s' type='%s'" % (
1755                     name, self.modulename_file(id.header), id.info))
1756            name = id.info[7:]
1757            if name in self.idx.structs and ( \
1758               type(self.idx.structs[name].info) == type(()) or
1759                type(self.idx.structs[name].info) == type([])):
1760                output.write(">\n");
1761                try:
1762                    for field in self.idx.structs[name].info:
1763                        desc = field[2]
1764                        self.indexString(name, desc)
1765                        if desc == None:
1766                            desc = ''
1767                        else:
1768                            desc = escape(desc)
1769                        output.write("      <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1770                except:
1771                    print("Failed to serialize struct %s" % (name))
1772                output.write("    </struct>\n")
1773            else:
1774                output.write("/>\n");
1775        else :
1776            output.write("    <typedef name='%s' file='%s' type='%s'" % (
1777                         name, self.modulename_file(id.header), id.info))
1778            try:
1779                desc = id.extra
1780                if desc != None and desc != "":
1781                    output.write(">\n      <info>%s</info>\n" % (escape(desc)))
1782                    output.write("    </typedef>\n")
1783                else:
1784                    output.write("/>\n")
1785            except:
1786                output.write("/>\n")
1787
1788    def serialize_variable(self, output, name):
1789        id = self.idx.variables[name]
1790        if id.info != None:
1791            output.write("    <variable name='%s' file='%s' type='%s'/>\n" % (
1792                    name, self.modulename_file(id.header), id.info))
1793        else:
1794            output.write("    <variable name='%s' file='%s'/>\n" % (
1795                    name, self.modulename_file(id.header)))
1796
1797    def serialize_function(self, output, name):
1798        id = self.idx.functions[name]
1799        if name == debugsym:
1800            print("=>", id)
1801
1802        output.write("    <%s name='%s' file='%s' module='%s'>\n" % (id.type,
1803                     name, self.modulename_file(id.header),
1804                     self.modulename_file(id.module)))
1805        #
1806        # Processing of conditionals modified by Bill 1/1/05
1807        #
1808        if id.conditionals != None:
1809            apstr = ""
1810            for cond in id.conditionals:
1811                if apstr != "":
1812                    apstr = apstr + " &amp;&amp; "
1813                apstr = apstr + cond
1814            output.write("      <cond>%s</cond>\n"% (apstr));
1815        try:
1816            (ret, params, desc) = id.info
1817            if (desc == None or desc == '') and \
1818               name[0:9] != "xmlThrDef" and name != "xmlDllMain":
1819                print("%s %s from %s has no description" % (id.type, name,
1820                       self.modulename_file(id.module)))
1821
1822            output.write("      <info>%s</info>\n" % (escape(desc)))
1823            self.indexString(name, desc)
1824            if ret[0] != None:
1825                if ret[0] == "void":
1826                    output.write("      <return type='void'/>\n")
1827                else:
1828                    output.write("      <return type='%s' info='%s'/>\n" % (
1829                             ret[0], escape(ret[1])))
1830                    self.indexString(name, ret[1])
1831            for param in params:
1832                if param[0] == 'void':
1833                    continue
1834                if param[2] == None:
1835                    output.write("      <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1836                else:
1837                    output.write("      <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1838                    self.indexString(name, param[2])
1839        except:
1840            print("Failed to save function %s info: " % name, repr(id.info))
1841        output.write("    </%s>\n" % (id.type))
1842
1843    def serialize_exports(self, output, file):
1844        module = self.modulename_file(file)
1845        output.write("    <file name='%s'>\n" % (module))
1846        dict = self.headers[file]
1847        if dict.info != None:
1848            for data in ('Summary', 'Description', 'Author'):
1849                try:
1850                    output.write("     <%s>%s</%s>\n" % (
1851                                 data.lower(),
1852                                 escape(dict.info[data]),
1853                                 data.lower()))
1854                except:
1855                    print("Header %s lacks a %s description" % (module, data))
1856            if 'Description' in dict.info:
1857                desc = dict.info['Description']
1858                if desc.find("DEPRECATED") != -1:
1859                    output.write("     <deprecated/>\n")
1860
1861        ids = list(dict.macros.keys())
1862        ids.sort()
1863        for id in uniq(ids):
1864            # Macros are sometime used to masquerade other types.
1865            if id in dict.functions:
1866                continue
1867            if id in dict.variables:
1868                continue
1869            if id in dict.typedefs:
1870                continue
1871            if id in dict.structs:
1872                continue
1873            if id in dict.enums:
1874                continue
1875            output.write("     <exports symbol='%s' type='macro'/>\n" % (id))
1876        ids = list(dict.enums.keys())
1877        ids.sort()
1878        for id in uniq(ids):
1879            output.write("     <exports symbol='%s' type='enum'/>\n" % (id))
1880        ids = list(dict.typedefs.keys())
1881        ids.sort()
1882        for id in uniq(ids):
1883            output.write("     <exports symbol='%s' type='typedef'/>\n" % (id))
1884        ids = list(dict.structs.keys())
1885        ids.sort()
1886        for id in uniq(ids):
1887            output.write("     <exports symbol='%s' type='struct'/>\n" % (id))
1888        ids = list(dict.variables.keys())
1889        ids.sort()
1890        for id in uniq(ids):
1891            output.write("     <exports symbol='%s' type='variable'/>\n" % (id))
1892        ids = list(dict.functions.keys())
1893        ids.sort()
1894        for id in uniq(ids):
1895            output.write("     <exports symbol='%s' type='function'/>\n" % (id))
1896        output.write("    </file>\n")
1897
1898    def serialize_xrefs_files(self, output):
1899        headers = list(self.headers.keys())
1900        headers.sort()
1901        for file in headers:
1902            module = self.modulename_file(file)
1903            output.write("    <file name='%s'>\n" % (module))
1904            dict = self.headers[file]
1905            ids = uniq(list(dict.functions.keys()) + list(dict.variables.keys()) + \
1906                  list(dict.macros.keys()) + list(dict.typedefs.keys()) + \
1907                  list(dict.structs.keys()) + list(dict.enums.keys()))
1908            ids.sort()
1909            for id in ids:
1910                output.write("      <ref name='%s'/>\n" % (id))
1911            output.write("    </file>\n")
1912        pass
1913
1914    def serialize_xrefs_functions(self, output):
1915        funcs = {}
1916        for name in list(self.idx.functions.keys()):
1917            id = self.idx.functions[name]
1918            try:
1919                (ret, params, desc) = id.info
1920                for param in params:
1921                    if param[0] == 'void':
1922                        continue
1923                    if param[0] in funcs:
1924                        funcs[param[0]].append(name)
1925                    else:
1926                        funcs[param[0]] = [name]
1927            except:
1928                pass
1929        typ = list(funcs.keys())
1930        typ.sort()
1931        for type in typ:
1932            if type == '' or type == 'void' or type == "int" or \
1933               type == "char *" or type == "const char *" :
1934                continue
1935            output.write("    <type name='%s'>\n" % (type))
1936            ids = funcs[type]
1937            ids.sort()
1938            pid = ''        # not sure why we have dups, but get rid of them!
1939            for id in ids:
1940                if id != pid:
1941                    output.write("      <ref name='%s'/>\n" % (id))
1942                    pid = id
1943            output.write("    </type>\n")
1944
1945    def serialize_xrefs_constructors(self, output):
1946        funcs = {}
1947        for name in list(self.idx.functions.keys()):
1948            id = self.idx.functions[name]
1949            try:
1950                (ret, params, desc) = id.info
1951                if ret[0] == "void":
1952                    continue
1953                if ret[0] in funcs:
1954                    funcs[ret[0]].append(name)
1955                else:
1956                    funcs[ret[0]] = [name]
1957            except:
1958                pass
1959        typ = list(funcs.keys())
1960        typ.sort()
1961        for type in typ:
1962            if type == '' or type == 'void' or type == "int" or \
1963               type == "char *" or type == "const char *" :
1964                continue
1965            output.write("    <type name='%s'>\n" % (type))
1966            ids = funcs[type]
1967            ids.sort()
1968            for id in ids:
1969                output.write("      <ref name='%s'/>\n" % (id))
1970            output.write("    </type>\n")
1971
1972    def serialize_xrefs_alpha(self, output):
1973        letter = None
1974        ids = list(self.idx.identifiers.keys())
1975        ids.sort()
1976        for id in ids:
1977            if id[0] != letter:
1978                if letter != None:
1979                    output.write("    </letter>\n")
1980                letter = id[0]
1981                output.write("    <letter name='%s'>\n" % (letter))
1982            output.write("      <ref name='%s'/>\n" % (id))
1983        if letter != None:
1984            output.write("    </letter>\n")
1985
1986    def serialize_xrefs_references(self, output):
1987        typ = list(self.idx.identifiers.keys())
1988        typ.sort()
1989        for id in typ:
1990            idf = self.idx.identifiers[id]
1991            module = idf.header
1992            output.write("    <reference name='%s' href='%s'/>\n" % (id,
1993                         'html/' + self.basename + '-' +
1994                         self.modulename_file(module) + '.html#' +
1995                         id))
1996
1997    def serialize_xrefs_index(self, output):
1998        index = self.xref
1999        typ = list(index.keys())
2000        typ.sort()
2001        letter = None
2002        count = 0
2003        chunk = 0
2004        chunks = []
2005        for id in typ:
2006            if len(index[id]) > 30:
2007                continue
2008            if id[0] != letter:
2009                if letter == None or count > 200:
2010                    if letter != None:
2011                        output.write("      </letter>\n")
2012                        output.write("    </chunk>\n")
2013                        count = 0
2014                        chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
2015                    output.write("    <chunk name='chunk%s'>\n" % (chunk))
2016                    first_letter = id[0]
2017                    chunk = chunk + 1
2018                elif letter != None:
2019                    output.write("      </letter>\n")
2020                letter = id[0]
2021                output.write("      <letter name='%s'>\n" % (letter))
2022            output.write("        <word name='%s'>\n" % (id))
2023            tokens = index[id];
2024            tokens.sort()
2025            tok = None
2026            for token in tokens:
2027                if tok == token:
2028                    continue
2029                tok = token
2030                output.write("          <ref name='%s'/>\n" % (token))
2031                count = count + 1
2032            output.write("        </word>\n")
2033        if letter != None:
2034            output.write("      </letter>\n")
2035            output.write("    </chunk>\n")
2036            if count != 0:
2037                chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
2038            output.write("    <chunks>\n")
2039            for ch in chunks:
2040                output.write("      <chunk name='%s' start='%s' end='%s'/>\n" % (
2041                             ch[0], ch[1], ch[2]))
2042            output.write("    </chunks>\n")
2043
2044    def serialize_xrefs(self, output):
2045        output.write("  <references>\n")
2046        self.serialize_xrefs_references(output)
2047        output.write("  </references>\n")
2048        output.write("  <alpha>\n")
2049        self.serialize_xrefs_alpha(output)
2050        output.write("  </alpha>\n")
2051        output.write("  <constructors>\n")
2052        self.serialize_xrefs_constructors(output)
2053        output.write("  </constructors>\n")
2054        output.write("  <functions>\n")
2055        self.serialize_xrefs_functions(output)
2056        output.write("  </functions>\n")
2057        output.write("  <files>\n")
2058        self.serialize_xrefs_files(output)
2059        output.write("  </files>\n")
2060        output.write("  <index>\n")
2061        self.serialize_xrefs_index(output)
2062        output.write("  </index>\n")
2063
2064    def serialize(self):
2065        filename = "%s-api.xml" % self.name
2066        print("Saving XML description %s" % (filename))
2067        output = open(filename, "w")
2068        output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2069        output.write("<api name='%s'>\n" % self.name)
2070        output.write("  <files>\n")
2071        headers = list(self.headers.keys())
2072        headers.sort()
2073        for file in headers:
2074            self.serialize_exports(output, file)
2075        output.write("  </files>\n")
2076        output.write("  <symbols>\n")
2077        macros = list(self.idx.macros.keys())
2078        macros.sort()
2079        for macro in macros:
2080            self.serialize_macro(output, macro)
2081        enums = list(self.idx.enums.keys())
2082        enums.sort()
2083        for enum in enums:
2084            self.serialize_enum(output, enum)
2085        typedefs = list(self.idx.typedefs.keys())
2086        typedefs.sort()
2087        for typedef in typedefs:
2088            self.serialize_typedef(output, typedef)
2089        variables = list(self.idx.variables.keys())
2090        variables.sort()
2091        for variable in variables:
2092            self.serialize_variable(output, variable)
2093        functions = list(self.idx.functions.keys())
2094        functions.sort()
2095        for function in functions:
2096            self.serialize_function(output, function)
2097        output.write("  </symbols>\n")
2098        output.write("</api>\n")
2099        output.close()
2100
2101        filename = "%s-refs.xml" % self.name
2102        print("Saving XML Cross References %s" % (filename))
2103        output = open(filename, "w")
2104        output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2105        output.write("<apirefs name='%s'>\n" % self.name)
2106        self.serialize_xrefs(output)
2107        output.write("</apirefs>\n")
2108        output.close()
2109
2110
2111def rebuild():
2112    builder = None
2113    if glob.glob("parser.c") != [] :
2114        print("Rebuilding API description for libxml2")
2115        builder = docBuilder("libxml2", [".", "."],
2116                             ["xmlwin32version.h", "tst.c"])
2117    elif glob.glob("../parser.c") != [] :
2118        print("Rebuilding API description for libxml2")
2119        builder = docBuilder("libxml2", ["..", "../include/libxml"],
2120                             ["xmlwin32version.h", "tst.c"])
2121    elif glob.glob("../libxslt/transform.c") != [] :
2122        print("Rebuilding API description for libxslt")
2123        builder = docBuilder("libxslt", ["../libxslt"],
2124                             ["win32config.h", "libxslt.h", "tst.c"])
2125    else:
2126        print("rebuild() failed, unable to guess the module")
2127        return None
2128    builder.scan()
2129    builder.analyze()
2130    builder.serialize()
2131    if glob.glob("../libexslt/exslt.c") != [] :
2132        extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
2133        extra.scan()
2134        extra.analyze()
2135        extra.serialize()
2136    return builder
2137
2138#
2139# for debugging the parser
2140#
2141def parse(filename):
2142    parser = CParser(filename)
2143    idx = parser.parse()
2144    return idx
2145
2146if __name__ == "__main__":
2147    if len(sys.argv) > 1:
2148        debug = 1
2149        parse(sys.argv[1])
2150    else:
2151        rebuild()
2152