1#!/usr/bin/python -u
2#
3# This is the API builder, it parses the C sources and build the
4# API formal description in XML.
5#
6# See Copyright for the status of this software.
7#
8# daniel@veillard.com
9#
10import os, sys
11import string
12import glob
13
14debug=0
15#debugsym='ignorableWhitespaceSAXFunc'
16debugsym=None
17
18#
19# C parser analysis code
20#
21ignored_files = {
22  "trio": "too many non standard macros",
23  "trio.c": "too many non standard macros",
24  "trionan.c": "too many non standard macros",
25  "triostr.c": "too many non standard macros",
26  "acconfig.h": "generated portability layer",
27  "config.h": "generated portability layer",
28  "libxml.h": "internal only",
29  "testOOM.c": "out of memory tester",
30  "testOOMlib.h": "out of memory tester",
31  "testOOMlib.c": "out of memory tester",
32  "rngparser.c": "not yet integrated",
33  "rngparser.h": "not yet integrated",
34  "elfgcchack.h": "not a normal header",
35  "testHTML.c": "test tool",
36  "testReader.c": "test tool",
37  "testSchemas.c": "test tool",
38  "testXPath.c": "test tool",
39  "testAutomata.c": "test tool",
40  "testModule.c": "test tool",
41  "testRegexp.c": "test tool",
42  "testThreads.c": "test tool",
43  "testC14N.c": "test tool",
44  "testRelax.c": "test tool",
45  "testThreadsWin32.c": "test tool",
46  "testSAX.c": "test tool",
47  "testURI.c": "test tool",
48  "testapi.c": "generated regression tests",
49  "runtest.c": "regression tests program",
50  "runsuite.c": "regression tests program",
51  "tst.c": "not part of the library",
52  "test.c": "not part of the library",
53  "testdso.c": "test for dynamid shared libraries",
54  "testrecurse.c": "test for entities recursions",
55}
56
57ignored_words = {
58  "WINAPI": (0, "Windows keyword"),
59  "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
60  "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
61  "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
62  "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
63  "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
64  "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
65  "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
66  "XMLCALL": (0, "Special macro for win32 calls"),
67  "XSLTCALL": (0, "Special macro for win32 calls"),
68  "XMLCDECL": (0, "Special macro for win32 calls"),
69  "EXSLTCALL": (0, "Special macro for win32 calls"),
70  "__declspec": (3, "Windows keyword"),
71  "__stdcall": (0, "Windows keyword"),
72  "ATTRIBUTE_UNUSED": (0, "macro keyword"),
73  "LIBEXSLT_PUBLIC": (0, "macro keyword"),
74  "X_IN_Y": (5, "macro function builder"),
75}
76
77def escape(raw):
78    raw = string.replace(raw, '&', '&')
79    raw = string.replace(raw, '<', '&lt;')
80    raw = string.replace(raw, '>', '&gt;')
81    raw = string.replace(raw, "'", '&apos;')
82    raw = string.replace(raw, '"', '&quot;')
83    return raw
84
85def uniq(items):
86    d = {}
87    for item in items:
88        d[item]=1
89    return d.keys()
90
91class identifier:
92    def __init__(self, name, header=None, module=None, type=None, lineno = 0,
93                 info=None, extra=None, conditionals = None):
94        self.name = name
95	self.header = header
96	self.module = module
97	self.type = type
98	self.info = info
99	self.extra = extra
100	self.lineno = lineno
101	self.static = 0
102	if conditionals == None or len(conditionals) == 0:
103	    self.conditionals = None
104	else:
105	    self.conditionals = conditionals[:]
106	if self.name == debugsym:
107	    print "=> define %s : %s" % (debugsym, (module, type, info,
108	                                 extra, conditionals))
109
110    def __repr__(self):
111        r = "%s %s:" % (self.type, self.name)
112	if self.static:
113	    r = r + " static"
114	if self.module != None:
115	    r = r + " from %s" % (self.module)
116	if self.info != None:
117	    r = r + " " +  `self.info`
118	if self.extra != None:
119	    r = r + " " + `self.extra`
120	if self.conditionals != None:
121	    r = r + " " + `self.conditionals`
122	return r
123
124
125    def set_header(self, header):
126        self.header = header
127    def set_module(self, module):
128        self.module = module
129    def set_type(self, type):
130        self.type = type
131    def set_info(self, info):
132        self.info = info
133    def set_extra(self, extra):
134        self.extra = extra
135    def set_lineno(self, lineno):
136        self.lineno = lineno
137    def set_static(self, static):
138        self.static = static
139    def set_conditionals(self, conditionals):
140	if conditionals == None or len(conditionals) == 0:
141	    self.conditionals = None
142	else:
143	    self.conditionals = conditionals[:]
144
145    def get_name(self):
146        return self.name
147    def get_header(self):
148        return self.module
149    def get_module(self):
150        return self.module
151    def get_type(self):
152        return self.type
153    def get_info(self):
154        return self.info
155    def get_lineno(self):
156        return self.lineno
157    def get_extra(self):
158        return self.extra
159    def get_static(self):
160        return self.static
161    def get_conditionals(self):
162        return self.conditionals
163
164    def update(self, header, module, type = None, info = None, extra=None,
165               conditionals=None):
166	if self.name == debugsym:
167	    print "=> update %s : %s" % (debugsym, (module, type, info,
168	                                 extra, conditionals))
169        if header != None and self.header == None:
170	    self.set_header(module)
171        if module != None and (self.module == None or self.header == self.module):
172	    self.set_module(module)
173        if type != None and self.type == None:
174	    self.set_type(type)
175        if info != None:
176	    self.set_info(info)
177        if extra != None:
178	    self.set_extra(extra)
179        if conditionals != None:
180	    self.set_conditionals(conditionals)
181
182class index:
183    def __init__(self, name = "noname"):
184        self.name = name
185        self.identifiers = {}
186        self.functions = {}
187	self.variables = {}
188	self.includes = {}
189	self.structs = {}
190	self.enums = {}
191	self.typedefs = {}
192	self.macros = {}
193	self.references = {}
194	self.info = {}
195
196    def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
197        if name[0:2] == '__':
198	    return None
199        d = None
200        try:
201	   d = self.identifiers[name]
202	   d.update(header, module, type, lineno, info, extra, conditionals)
203	except:
204	   d = identifier(name, header, module, type, lineno, info, extra, conditionals)
205	   self.identifiers[name] = d
206
207	if d != None and static == 1:
208	    d.set_static(1)
209
210	if d != None and name != None and type != None:
211	    self.references[name] = d
212
213	if name == debugsym:
214	    print "New ref: %s" % (d)
215
216	return d
217
218    def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
219        if name[0:2] == '__':
220	    return None
221        d = None
222        try:
223	   d = self.identifiers[name]
224	   d.update(header, module, type, lineno, info, extra, conditionals)
225	except:
226	   d = identifier(name, header, module, type, lineno, info, extra, conditionals)
227	   self.identifiers[name] = d
228
229	if d != None and static == 1:
230	    d.set_static(1)
231
232	if d != None and name != None and type != None:
233	    if type == "function":
234	        self.functions[name] = d
235	    elif type == "functype":
236	        self.functions[name] = d
237	    elif type == "variable":
238	        self.variables[name] = d
239	    elif type == "include":
240	        self.includes[name] = d
241	    elif type == "struct":
242	        self.structs[name] = d
243	    elif type == "enum":
244	        self.enums[name] = d
245	    elif type == "typedef":
246	        self.typedefs[name] = d
247	    elif type == "macro":
248	        self.macros[name] = d
249	    else:
250	        print "Unable to register type ", type
251
252	if name == debugsym:
253	    print "New symbol: %s" % (d)
254
255	return d
256
257    def merge(self, idx):
258        for id in idx.functions.keys():
259              #
260              # macro might be used to override functions or variables
261              # definitions
262              #
263	     if self.macros.has_key(id):
264	         del self.macros[id]
265	     if self.functions.has_key(id):
266	         print "function %s from %s redeclared in %s" % (
267		    id, self.functions[id].header, idx.functions[id].header)
268	     else:
269	         self.functions[id] = idx.functions[id]
270		 self.identifiers[id] = idx.functions[id]
271        for id in idx.variables.keys():
272              #
273              # macro might be used to override functions or variables
274              # definitions
275              #
276	     if self.macros.has_key(id):
277	         del self.macros[id]
278	     if self.variables.has_key(id):
279	         print "variable %s from %s redeclared in %s" % (
280		    id, self.variables[id].header, idx.variables[id].header)
281	     else:
282	         self.variables[id] = idx.variables[id]
283		 self.identifiers[id] = idx.variables[id]
284        for id in idx.structs.keys():
285	     if self.structs.has_key(id):
286	         print "struct %s from %s redeclared in %s" % (
287		    id, self.structs[id].header, idx.structs[id].header)
288	     else:
289	         self.structs[id] = idx.structs[id]
290		 self.identifiers[id] = idx.structs[id]
291        for id in idx.typedefs.keys():
292	     if self.typedefs.has_key(id):
293	         print "typedef %s from %s redeclared in %s" % (
294		    id, self.typedefs[id].header, idx.typedefs[id].header)
295	     else:
296	         self.typedefs[id] = idx.typedefs[id]
297		 self.identifiers[id] = idx.typedefs[id]
298        for id in idx.macros.keys():
299              #
300              # macro might be used to override functions or variables
301              # definitions
302              #
303             if self.variables.has_key(id):
304                 continue
305             if self.functions.has_key(id):
306                 continue
307             if self.enums.has_key(id):
308                 continue
309	     if self.macros.has_key(id):
310	         print "macro %s from %s redeclared in %s" % (
311		    id, self.macros[id].header, idx.macros[id].header)
312	     else:
313	         self.macros[id] = idx.macros[id]
314		 self.identifiers[id] = idx.macros[id]
315        for id in idx.enums.keys():
316	     if self.enums.has_key(id):
317	         print "enum %s from %s redeclared in %s" % (
318		    id, self.enums[id].header, idx.enums[id].header)
319	     else:
320	         self.enums[id] = idx.enums[id]
321		 self.identifiers[id] = idx.enums[id]
322
323    def merge_public(self, idx):
324        for id in idx.functions.keys():
325	     if self.functions.has_key(id):
326	         # check that function condition agrees with header
327	         if idx.functions[id].conditionals != \
328		    self.functions[id].conditionals:
329		     print "Header condition differs from Function for %s:" \
330		        % id
331		     print "  H: %s" % self.functions[id].conditionals
332		     print "  C: %s" % idx.functions[id].conditionals
333	         up = idx.functions[id]
334	         self.functions[id].update(None, up.module, up.type, up.info, up.extra)
335	 #     else:
336	 #         print "Function %s from %s is not declared in headers" % (
337	 #	        id, idx.functions[id].module)
338	 # TODO: do the same for variables.
339
340    def analyze_dict(self, type, dict):
341        count = 0
342	public = 0
343        for name in dict.keys():
344	    id = dict[name]
345	    count = count + 1
346	    if id.static == 0:
347	        public = public + 1
348        if count != public:
349	    print "  %d %s , %d public" % (count, type, public)
350	elif count != 0:
351	    print "  %d public %s" % (count, type)
352
353
354    def analyze(self):
355	self.analyze_dict("functions", self.functions)
356	self.analyze_dict("variables", self.variables)
357	self.analyze_dict("structs", self.structs)
358	self.analyze_dict("typedefs", self.typedefs)
359	self.analyze_dict("macros", self.macros)
360
361class CLexer:
362    """A lexer for the C language, tokenize the input by reading and
363       analyzing it line by line"""
364    def __init__(self, input):
365        self.input = input
366	self.tokens = []
367	self.line = ""
368	self.lineno = 0
369
370    def getline(self):
371        line = ''
372	while line == '':
373	    line = self.input.readline()
374	    if not line:
375		return None
376	    self.lineno = self.lineno + 1
377	    line = string.lstrip(line)
378	    line = string.rstrip(line)
379	    if line == '':
380	        continue
381	    while line[-1] == '\\':
382	        line = line[:-1]
383		n = self.input.readline()
384		self.lineno = self.lineno + 1
385		n = string.lstrip(n)
386		n = string.rstrip(n)
387		if not n:
388		    break
389		else:
390		    line = line + n
391        return line
392
393    def getlineno(self):
394        return self.lineno
395
396    def push(self, token):
397        self.tokens.insert(0, token);
398
399    def debug(self):
400        print "Last token: ", self.last
401	print "Token queue: ", self.tokens
402	print "Line %d end: " % (self.lineno), self.line
403
404    def token(self):
405        while self.tokens == []:
406	    if self.line == "":
407		line = self.getline()
408	    else:
409	        line = self.line
410		self.line = ""
411	    if line == None:
412	        return None
413
414	    if line[0] == '#':
415	        self.tokens = map((lambda x: ('preproc', x)),
416		                  string.split(line))
417		break;
418	    l = len(line)
419	    if line[0] == '"' or line[0] == "'":
420	        end = line[0]
421	        line = line[1:]
422		found = 0
423		tok = ""
424		while found == 0:
425		    i = 0
426		    l = len(line)
427		    while i < l:
428			if line[i] == end:
429			    self.line = line[i+1:]
430			    line = line[:i]
431			    l = i
432			    found = 1
433			    break
434			if line[i] == '\\':
435			    i = i + 1
436			i = i + 1
437		    tok = tok + line
438		    if found == 0:
439		        line = self.getline()
440			if line == None:
441			    return None
442		self.last = ('string', tok)
443		return self.last
444
445	    if l >= 2 and line[0] == '/' and line[1] == '*':
446	        line = line[2:]
447		found = 0
448		tok = ""
449		while found == 0:
450		    i = 0
451		    l = len(line)
452		    while i < l:
453			if line[i] == '*' and i+1 < l and line[i+1] == '/':
454			    self.line = line[i+2:]
455			    line = line[:i-1]
456			    l = i
457			    found = 1
458			    break
459			i = i + 1
460	            if tok != "":
461		        tok = tok + "\n"
462		    tok = tok + line
463		    if found == 0:
464		        line = self.getline()
465			if line == None:
466			    return None
467		self.last = ('comment', tok)
468		return self.last
469	    if l >= 2 and line[0] == '/' and line[1] == '/':
470	        line = line[2:]
471		self.last = ('comment', line)
472		return self.last
473	    i = 0
474	    while i < l:
475	        if line[i] == '/' and i+1 < l and line[i+1] == '/':
476		    self.line = line[i:]
477		    line = line[:i]
478		    break
479	        if line[i] == '/' and i+1 < l and line[i+1] == '*':
480		    self.line = line[i:]
481		    line = line[:i]
482		    break
483		if line[i] == '"' or line[i] == "'":
484		    self.line = line[i:]
485		    line = line[:i]
486		    break
487		i = i + 1
488	    l = len(line)
489	    i = 0
490	    while i < l:
491	        if line[i] == ' ' or line[i] == '\t':
492		    i = i + 1
493		    continue
494		o = ord(line[i])
495		if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
496		   (o >= 48 and o <= 57):
497		    s = i
498		    while i < l:
499			o = ord(line[i])
500			if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
501			   (o >= 48 and o <= 57) or string.find(
502			       " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
503			    i = i + 1
504			else:
505			    break
506		    self.tokens.append(('name', line[s:i]))
507		    continue
508		if string.find("(){}:;,[]", line[i]) != -1:
509#                 if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
510#		    line[i] == '}' or line[i] == ':' or line[i] == ';' or \
511#		    line[i] == ',' or line[i] == '[' or line[i] == ']':
512		    self.tokens.append(('sep', line[i]))
513		    i = i + 1
514		    continue
515		if string.find("+-*><=/%&!|.", line[i]) != -1:
516#                 if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
517#		    line[i] == '>' or line[i] == '<' or line[i] == '=' or \
518#		    line[i] == '/' or line[i] == '%' or line[i] == '&' or \
519#		    line[i] == '!' or line[i] == '|' or line[i] == '.':
520		    if line[i] == '.' and  i + 2 < l and \
521		       line[i+1] == '.' and line[i+2] == '.':
522			self.tokens.append(('name', '...'))
523			i = i + 3
524			continue
525
526		    j = i + 1
527		    if j < l and (
528		       string.find("+-*><=/%&!|", line[j]) != -1):
529#		        line[j] == '+' or line[j] == '-' or line[j] == '*' or \
530#			line[j] == '>' or line[j] == '<' or line[j] == '=' or \
531#			line[j] == '/' or line[j] == '%' or line[j] == '&' or \
532#			line[j] == '!' or line[j] == '|'):
533			self.tokens.append(('op', line[i:j+1]))
534			i = j + 1
535		    else:
536			self.tokens.append(('op', line[i]))
537			i = i + 1
538		    continue
539		s = i
540		while i < l:
541		    o = ord(line[i])
542		    if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
543		       (o >= 48 and o <= 57) or (
544		        string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
545#		         line[i] != ' ' and line[i] != '\t' and
546#			 line[i] != '(' and line[i] != ')' and
547#			 line[i] != '{'  and line[i] != '}' and
548#			 line[i] != ':' and line[i] != ';' and
549#			 line[i] != ',' and line[i] != '+' and
550#			 line[i] != '-' and line[i] != '*' and
551#			 line[i] != '/' and line[i] != '%' and
552#			 line[i] != '&' and line[i] != '!' and
553#			 line[i] != '|' and line[i] != '[' and
554#			 line[i] != ']' and line[i] != '=' and
555#			 line[i] != '*' and line[i] != '>' and
556#			 line[i] != '<'):
557			i = i + 1
558		    else:
559		        break
560		self.tokens.append(('name', line[s:i]))
561
562	tok = self.tokens[0]
563	self.tokens = self.tokens[1:]
564	self.last = tok
565	return tok
566
567class CParser:
568    """The C module parser"""
569    def __init__(self, filename, idx = None):
570        self.filename = filename
571	if len(filename) > 2 and filename[-2:] == '.h':
572	    self.is_header = 1
573	else:
574	    self.is_header = 0
575        self.input = open(filename)
576	self.lexer = CLexer(self.input)
577	if idx == None:
578	    self.index = index()
579	else:
580	    self.index = idx
581	self.top_comment = ""
582	self.last_comment = ""
583	self.comment = None
584	self.collect_ref = 0
585	self.no_error = 0
586	self.conditionals = []
587	self.defines = []
588
589    def collect_references(self):
590        self.collect_ref = 1
591
592    def stop_error(self):
593        self.no_error = 1
594
595    def start_error(self):
596        self.no_error = 0
597
598    def lineno(self):
599        return self.lexer.getlineno()
600
601    def index_add(self, name, module, static, type, info=None, extra = None):
602	if self.is_header == 1:
603	    self.index.add(name, module, module, static, type, self.lineno(),
604			   info, extra, self.conditionals)
605	else:
606	    self.index.add(name, None, module, static, type, self.lineno(),
607			   info, extra, self.conditionals)
608
609    def index_add_ref(self, name, module, static, type, info=None,
610                      extra = None):
611	if self.is_header == 1:
612	    self.index.add_ref(name, module, module, static, type,
613	                       self.lineno(), info, extra, self.conditionals)
614	else:
615	    self.index.add_ref(name, None, module, static, type, self.lineno(),
616			       info, extra, self.conditionals)
617
618    def warning(self, msg):
619        if self.no_error:
620	    return
621	print msg
622
623    def error(self, msg, token=-1):
624        if self.no_error:
625	    return
626
627        print "Parse Error: " + msg
628	if token != -1:
629	    print "Got token ", token
630	self.lexer.debug()
631	sys.exit(1)
632
633    def debug(self, msg, token=-1):
634        print "Debug: " + msg
635	if token != -1:
636	    print "Got token ", token
637	self.lexer.debug()
638
639    def parseTopComment(self, comment):
640	res = {}
641	lines = string.split(comment, "\n")
642	item = None
643	for line in lines:
644	    while line != "" and (line[0] == ' ' or line[0] == '\t'):
645		line = line[1:]
646	    while line != "" and line[0] == '*':
647		line = line[1:]
648	    while line != "" and (line[0] == ' ' or line[0] == '\t'):
649		line = line[1:]
650	    try:
651		(it, line) = string.split(line, ":", 1)
652		item = it
653		while line != "" and (line[0] == ' ' or line[0] == '\t'):
654		    line = line[1:]
655		if res.has_key(item):
656		    res[item] = res[item] + " " + line
657		else:
658		    res[item] = line
659	    except:
660		if item != None:
661		    if res.has_key(item):
662			res[item] = res[item] + " " + line
663		    else:
664			res[item] = line
665	self.index.info = res
666
667    def parseComment(self, token):
668        if self.top_comment == "":
669	    self.top_comment = token[1]
670	if self.comment == None or token[1][0] == '*':
671	    self.comment = token[1];
672	else:
673	    self.comment = self.comment + token[1]
674	token = self.lexer.token()
675
676        if string.find(self.comment, "DOC_DISABLE") != -1:
677	    self.stop_error()
678
679        if string.find(self.comment, "DOC_ENABLE") != -1:
680	    self.start_error()
681
682	return token
683
684    #
685    # Parse a comment block associate to a typedef
686    #
687    def parseTypeComment(self, name, quiet = 0):
688        if name[0:2] == '__':
689	    quiet = 1
690
691        args = []
692	desc = ""
693
694        if self.comment == None:
695	    if not quiet:
696		self.warning("Missing comment for type %s" % (name))
697	    return((args, desc))
698        if self.comment[0] != '*':
699	    if not quiet:
700		self.warning("Missing * in type comment for %s" % (name))
701	    return((args, desc))
702	lines = string.split(self.comment, '\n')
703	if lines[0] == '*':
704	    del lines[0]
705	if lines[0] != "* %s:" % (name):
706	    if not quiet:
707		self.warning("Misformatted type comment for %s" % (name))
708		self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
709	    return((args, desc))
710	del lines[0]
711	while len(lines) > 0 and lines[0] == '*':
712	    del lines[0]
713	desc = ""
714	while len(lines) > 0:
715	    l = lines[0]
716	    while len(l) > 0 and l[0] == '*':
717	        l = l[1:]
718	    l = string.strip(l)
719	    desc = desc + " " + l
720	    del lines[0]
721
722	desc = string.strip(desc)
723
724	if quiet == 0:
725	    if desc == "":
726	        self.warning("Type comment for %s lack description of the macro" % (name))
727
728	return(desc)
729    #
730    # Parse a comment block associate to a macro
731    #
732    def parseMacroComment(self, name, quiet = 0):
733        if name[0:2] == '__':
734	    quiet = 1
735
736        args = []
737	desc = ""
738
739        if self.comment == None:
740	    if not quiet:
741		self.warning("Missing comment for macro %s" % (name))
742	    return((args, desc))
743        if self.comment[0] != '*':
744	    if not quiet:
745		self.warning("Missing * in macro comment for %s" % (name))
746	    return((args, desc))
747	lines = string.split(self.comment, '\n')
748	if lines[0] == '*':
749	    del lines[0]
750	if lines[0] != "* %s:" % (name):
751	    if not quiet:
752		self.warning("Misformatted macro comment for %s" % (name))
753		self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
754	    return((args, desc))
755	del lines[0]
756	while lines[0] == '*':
757	    del lines[0]
758	while len(lines) > 0 and lines[0][0:3] == '* @':
759	    l = lines[0][3:]
760	    try:
761	        (arg, desc) = string.split(l, ':', 1)
762		desc=string.strip(desc)
763		arg=string.strip(arg)
764            except:
765		if not quiet:
766		    self.warning("Misformatted macro comment for %s" % (name))
767		    self.warning("  problem with '%s'" % (lines[0]))
768		del lines[0]
769		continue
770	    del lines[0]
771	    l = string.strip(lines[0])
772	    while len(l) > 2 and l[0:3] != '* @':
773	        while l[0] == '*':
774		    l = l[1:]
775		desc = desc + ' ' + string.strip(l)
776		del lines[0]
777		if len(lines) == 0:
778		    break
779		l = lines[0]
780            args.append((arg, desc))
781	while len(lines) > 0 and lines[0] == '*':
782	    del lines[0]
783	desc = ""
784	while len(lines) > 0:
785	    l = lines[0]
786	    while len(l) > 0 and l[0] == '*':
787	        l = l[1:]
788	    l = string.strip(l)
789	    desc = desc + " " + l
790	    del lines[0]
791
792	desc = string.strip(desc)
793
794	if quiet == 0:
795	    if desc == "":
796	        self.warning("Macro comment for %s lack description of the macro" % (name))
797
798	return((args, desc))
799
800     #
801     # Parse a comment block and merge the informations found in the
802     # parameters descriptions, finally returns a block as complete
803     # as possible
804     #
805    def mergeFunctionComment(self, name, description, quiet = 0):
806        if name == 'main':
807	    quiet = 1
808        if name[0:2] == '__':
809	    quiet = 1
810
811	(ret, args) = description
812	desc = ""
813	retdesc = ""
814
815        if self.comment == None:
816	    if not quiet:
817		self.warning("Missing comment for function %s" % (name))
818	    return(((ret[0], retdesc), args, desc))
819        if self.comment[0] != '*':
820	    if not quiet:
821		self.warning("Missing * in function comment for %s" % (name))
822	    return(((ret[0], retdesc), args, desc))
823	lines = string.split(self.comment, '\n')
824	if lines[0] == '*':
825	    del lines[0]
826	if lines[0] != "* %s:" % (name):
827	    if not quiet:
828		self.warning("Misformatted function comment for %s" % (name))
829		self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
830	    return(((ret[0], retdesc), args, desc))
831	del lines[0]
832	while lines[0] == '*':
833	    del lines[0]
834	nbargs = len(args)
835	while len(lines) > 0 and lines[0][0:3] == '* @':
836	    l = lines[0][3:]
837	    try:
838	        (arg, desc) = string.split(l, ':', 1)
839		desc=string.strip(desc)
840		arg=string.strip(arg)
841            except:
842		if not quiet:
843		    self.warning("Misformatted function comment for %s" % (name))
844		    self.warning("  problem with '%s'" % (lines[0]))
845		del lines[0]
846		continue
847	    del lines[0]
848	    l = string.strip(lines[0])
849	    while len(l) > 2 and l[0:3] != '* @':
850	        while l[0] == '*':
851		    l = l[1:]
852		desc = desc + ' ' + string.strip(l)
853		del lines[0]
854		if len(lines) == 0:
855		    break
856		l = lines[0]
857	    i = 0
858	    while i < nbargs:
859	        if args[i][1] == arg:
860		    args[i] = (args[i][0], arg, desc)
861		    break;
862		i = i + 1
863	    if i >= nbargs:
864		if not quiet:
865		    self.warning("Unable to find arg %s from function comment for %s" % (
866		       arg, name))
867	while len(lines) > 0 and lines[0] == '*':
868	    del lines[0]
869	desc = ""
870	while len(lines) > 0:
871	    l = lines[0]
872	    while len(l) > 0 and l[0] == '*':
873	        l = l[1:]
874	    l = string.strip(l)
875	    if len(l) >= 6 and  l[0:6] == "return" or l[0:6] == "Return":
876	        try:
877		    l = string.split(l, ' ', 1)[1]
878		except:
879		    l = ""
880		retdesc = string.strip(l)
881		del lines[0]
882		while len(lines) > 0:
883		    l = lines[0]
884		    while len(l) > 0 and l[0] == '*':
885			l = l[1:]
886		    l = string.strip(l)
887		    retdesc = retdesc + " " + l
888		    del lines[0]
889	    else:
890	        desc = desc + " " + l
891		del lines[0]
892
893	retdesc = string.strip(retdesc)
894	desc = string.strip(desc)
895
896	if quiet == 0:
897	     #
898	     # report missing comments
899	     #
900	    i = 0
901	    while i < nbargs:
902	        if args[i][2] == None and args[i][0] != "void" and \
903		   ((args[i][1] != None) or (args[i][1] == '')):
904		    self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
905		i = i + 1
906	    if retdesc == "" and ret[0] != "void":
907		self.warning("Function comment for %s lacks description of return value" % (name))
908	    if desc == "":
909	        self.warning("Function comment for %s lacks description of the function" % (name))
910
911	return(((ret[0], retdesc), args, desc))
912
913    def parsePreproc(self, token):
914	if debug:
915	    print "=> preproc ", token, self.lexer.tokens
916        name = token[1]
917	if name == "#include":
918	    token = self.lexer.token()
919	    if token == None:
920	        return None
921	    if token[0] == 'preproc':
922		self.index_add(token[1], self.filename, not self.is_header,
923		                "include")
924		return self.lexer.token()
925	    return token
926	if name == "#define":
927	    token = self.lexer.token()
928	    if token == None:
929	        return None
930	    if token[0] == 'preproc':
931	         # TODO macros with arguments
932		name = token[1]
933	        lst = []
934		token = self.lexer.token()
935		while token != None and token[0] == 'preproc' and \
936		      token[1][0] != '#':
937		    lst.append(token[1])
938		    token = self.lexer.token()
939                try:
940		    name = string.split(name, '(') [0]
941                except:
942                    pass
943                info = self.parseMacroComment(name, not self.is_header)
944		self.index_add(name, self.filename, not self.is_header,
945		                "macro", info)
946		return token
947
948	#
949	# Processing of conditionals modified by Bill 1/1/05
950	#
951	# We process conditionals (i.e. tokens from #ifdef, #ifndef,
952	# #if, #else and #endif) for headers and mainline code,
953	# store the ones from the header in libxml2-api.xml, and later
954	# (in the routine merge_public) verify that the two (header and
955	# mainline code) agree.
956	#
957	# There is a small problem with processing the headers. Some of
958	# the variables are not concerned with enabling / disabling of
959	# library functions (e.g. '__XML_PARSER_H__'), and we don't want
960	# them to be included in libxml2-api.xml, or involved in
961	# the check between the header and the mainline code.  To
962	# accomplish this, we ignore any conditional which doesn't include
963	# the string 'ENABLED'
964	#
965	if name == "#ifdef":
966	    apstr = self.lexer.tokens[0][1]
967	    try:
968	        self.defines.append(apstr)
969		if string.find(apstr, 'ENABLED') != -1:
970		    self.conditionals.append("defined(%s)" % apstr)
971	    except:
972	        pass
973	elif name == "#ifndef":
974	    apstr = self.lexer.tokens[0][1]
975	    try:
976	        self.defines.append(apstr)
977		if string.find(apstr, 'ENABLED') != -1:
978		    self.conditionals.append("!defined(%s)" % apstr)
979	    except:
980	        pass
981	elif name == "#if":
982	    apstr = ""
983	    for tok in self.lexer.tokens:
984	        if apstr != "":
985		    apstr = apstr + " "
986	        apstr = apstr + tok[1]
987	    try:
988	        self.defines.append(apstr)
989		if string.find(apstr, 'ENABLED') != -1:
990		    self.conditionals.append(apstr)
991	    except:
992	        pass
993	elif name == "#else":
994	    if self.conditionals != [] and \
995	       string.find(self.defines[-1], 'ENABLED') != -1:
996	        self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
997	elif name == "#endif":
998	    if self.conditionals != [] and \
999	       string.find(self.defines[-1], 'ENABLED') != -1:
1000	        self.conditionals = self.conditionals[:-1]
1001	    self.defines = self.defines[:-1]
1002	token = self.lexer.token()
1003	while token != None and token[0] == 'preproc' and \
1004	    token[1][0] != '#':
1005	    token = self.lexer.token()
1006	return token
1007
1008     #
1009     # token acquisition on top of the lexer, it handle internally
1010     # preprocessor and comments since they are logically not part of
1011     # the program structure.
1012     #
1013    def token(self):
1014        global ignored_words
1015
1016        token = self.lexer.token()
1017	while token != None:
1018	    if token[0] == 'comment':
1019		token = self.parseComment(token)
1020		continue
1021	    elif token[0] == 'preproc':
1022		token = self.parsePreproc(token)
1023		continue
1024	    elif token[0] == "name" and token[1] == "__const":
1025	        token = ("name", "const")
1026		return token
1027	    elif token[0] == "name" and token[1] == "__attribute":
1028		token = self.lexer.token()
1029		while token != None and token[1] != ";":
1030		    token = self.lexer.token()
1031		return token
1032	    elif token[0] == "name" and ignored_words.has_key(token[1]):
1033	        (n, info) = ignored_words[token[1]]
1034		i = 0
1035		while i < n:
1036		    token = self.lexer.token()
1037		    i = i + 1
1038		token = self.lexer.token()
1039		continue
1040	    else:
1041	        if debug:
1042		    print "=> ", token
1043	        return token
1044	return None
1045
1046     #
1047     # Parse a typedef, it records the type and its name.
1048     #
1049    def parseTypedef(self, token):
1050        if token == None:
1051	    return None
1052	token = self.parseType(token)
1053	if token == None:
1054	    self.error("parsing typedef")
1055	    return None
1056	base_type = self.type
1057	type = base_type
1058	 #self.debug("end typedef type", token)
1059	while token != None:
1060	    if token[0] == "name":
1061		name = token[1]
1062		signature = self.signature
1063		if signature != None:
1064		    type = string.split(type, '(')[0]
1065		    d = self.mergeFunctionComment(name,
1066			    ((type, None), signature), 1)
1067		    self.index_add(name, self.filename, not self.is_header,
1068				    "functype", d)
1069		else:
1070		    if base_type == "struct":
1071			self.index_add(name, self.filename, not self.is_header,
1072					"struct", type)
1073			base_type = "struct " + name
1074	            else:
1075			# TODO report missing or misformatted comments
1076			info = self.parseTypeComment(name, 1)
1077			self.index_add(name, self.filename, not self.is_header,
1078		                    "typedef", type, info)
1079		token = self.token()
1080	    else:
1081		self.error("parsing typedef: expecting a name")
1082		return token
1083	     #self.debug("end typedef", token)
1084	    if token != None and token[0] == 'sep' and token[1] == ',':
1085	        type = base_type
1086	        token = self.token()
1087		while token != None and token[0] == "op":
1088		    type = type + token[1]
1089		    token = self.token()
1090	    elif token != None and token[0] == 'sep' and token[1] == ';':
1091	        break;
1092	    elif token != None and token[0] == 'name':
1093	        type = base_type
1094	        continue;
1095	    else:
1096		self.error("parsing typedef: expecting ';'", token)
1097		return token
1098	token = self.token()
1099	return token
1100
1101     #
1102     # Parse a C code block, used for functions it parse till
1103     # the balancing } included
1104     #
1105    def parseBlock(self, token):
1106        while token != None:
1107	    if token[0] == "sep" and token[1] == "{":
1108	        token = self.token()
1109		token = self.parseBlock(token)
1110	    elif token[0] == "sep" and token[1] == "}":
1111	        self.comment = None
1112	        token = self.token()
1113		return token
1114	    else:
1115	        if self.collect_ref == 1:
1116		    oldtok = token
1117		    token = self.token()
1118		    if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
1119		        if token[0] == "sep" and token[1] == "(":
1120			    self.index_add_ref(oldtok[1], self.filename,
1121			                        0, "function")
1122			    token = self.token()
1123			elif token[0] == "name":
1124			    token = self.token()
1125			    if token[0] == "sep" and (token[1] == ";" or
1126			       token[1] == "," or token[1] == "="):
1127				self.index_add_ref(oldtok[1], self.filename,
1128						    0, "type")
1129		    elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
1130			self.index_add_ref(oldtok[1], self.filename,
1131					    0, "typedef")
1132		    elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
1133			self.index_add_ref(oldtok[1], self.filename,
1134					    0, "typedef")
1135
1136		else:
1137		    token = self.token()
1138	return token
1139
1140     #
1141     # Parse a C struct definition till the balancing }
1142     #
1143    def parseStruct(self, token):
1144        fields = []
1145	 #self.debug("start parseStruct", token)
1146        while token != None:
1147	    if token[0] == "sep" and token[1] == "{":
1148	        token = self.token()
1149		token = self.parseTypeBlock(token)
1150	    elif token[0] == "sep" and token[1] == "}":
1151		self.struct_fields = fields
1152		 #self.debug("end parseStruct", token)
1153		 #print fields
1154	        token = self.token()
1155		return token
1156	    else:
1157	        base_type = self.type
1158		 #self.debug("before parseType", token)
1159		token = self.parseType(token)
1160		 #self.debug("after parseType", token)
1161		if token != None and token[0] == "name":
1162		    fname = token[1]
1163		    token = self.token()
1164		    if token[0] == "sep" and token[1] == ";":
1165		        self.comment = None
1166		        token = self.token()
1167			fields.append((self.type, fname, self.comment))
1168			self.comment = None
1169		    else:
1170		        self.error("parseStruct: expecting ;", token)
1171		elif token != None and token[0] == "sep" and token[1] == "{":
1172		    token = self.token()
1173		    token = self.parseTypeBlock(token)
1174		    if token != None and token[0] == "name":
1175			token = self.token()
1176		    if token != None and token[0] == "sep" and token[1] == ";":
1177			token = self.token()
1178		    else:
1179		        self.error("parseStruct: expecting ;", token)
1180		else:
1181		    self.error("parseStruct: name", token)
1182		    token = self.token()
1183		self.type = base_type;
1184        self.struct_fields = fields
1185	 #self.debug("end parseStruct", token)
1186	 #print fields
1187	return token
1188
1189     #
1190     # Parse a C enum block, parse till the balancing }
1191     #
1192    def parseEnumBlock(self, token):
1193        self.enums = []
1194	name = None
1195	self.comment = None
1196	comment = ""
1197	value = "0"
1198        while token != None:
1199	    if token[0] == "sep" and token[1] == "{":
1200	        token = self.token()
1201		token = self.parseTypeBlock(token)
1202	    elif token[0] == "sep" and token[1] == "}":
1203		if name != None:
1204		    if self.comment != None:
1205			comment = self.comment
1206			self.comment = None
1207		    self.enums.append((name, value, comment))
1208	        token = self.token()
1209		return token
1210	    elif token[0] == "name":
1211		    if name != None:
1212			if self.comment != None:
1213			    comment = string.strip(self.comment)
1214			    self.comment = None
1215			self.enums.append((name, value, comment))
1216		    name = token[1]
1217		    comment = ""
1218		    token = self.token()
1219		    if token[0] == "op" and token[1][0] == "=":
1220		        value = ""
1221		        if len(token[1]) > 1:
1222			    value = token[1][1:]
1223		        token = self.token()
1224		        while token[0] != "sep" or (token[1] != ',' and
1225			      token[1] != '}'):
1226			    value = value + token[1]
1227			    token = self.token()
1228		    else:
1229		        try:
1230			    value = "%d" % (int(value) + 1)
1231			except:
1232			    self.warning("Failed to compute value of enum %s" % (name))
1233			    value=""
1234		    if token[0] == "sep" and token[1] == ",":
1235			token = self.token()
1236	    else:
1237	        token = self.token()
1238	return token
1239
1240     #
1241     # Parse a C definition block, used for structs it parse till
1242     # the balancing }
1243     #
1244    def parseTypeBlock(self, token):
1245        while token != None:
1246	    if token[0] == "sep" and token[1] == "{":
1247	        token = self.token()
1248		token = self.parseTypeBlock(token)
1249	    elif token[0] == "sep" and token[1] == "}":
1250	        token = self.token()
1251		return token
1252	    else:
1253	        token = self.token()
1254	return token
1255
1256     #
1257     # Parse a type: the fact that the type name can either occur after
1258     #    the definition or within the definition makes it a little harder
1259     #    if inside, the name token is pushed back before returning
1260     #
1261    def parseType(self, token):
1262        self.type = ""
1263	self.struct_fields = []
1264        self.signature = None
1265	if token == None:
1266	    return token
1267
1268	while token[0] == "name" and (
1269	      token[1] == "const" or \
1270	      token[1] == "unsigned" or \
1271	      token[1] == "signed"):
1272	    if self.type == "":
1273	        self.type = token[1]
1274	    else:
1275	        self.type = self.type + " " + token[1]
1276	    token = self.token()
1277
1278        if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
1279	    if self.type == "":
1280	        self.type = token[1]
1281	    else:
1282	        self.type = self.type + " " + token[1]
1283	    if token[0] == "name" and token[1] == "int":
1284		if self.type == "":
1285		    self.type = tmp[1]
1286		else:
1287		    self.type = self.type + " " + tmp[1]
1288
1289        elif token[0] == "name" and token[1] == "struct":
1290	    if self.type == "":
1291	        self.type = token[1]
1292	    else:
1293	        self.type = self.type + " " + token[1]
1294	    token = self.token()
1295	    nametok = None
1296	    if token[0] == "name":
1297	        nametok = token
1298		token = self.token()
1299	    if token != None and token[0] == "sep" and token[1] == "{":
1300		token = self.token()
1301		token = self.parseStruct(token)
1302	    elif token != None and token[0] == "op" and token[1] == "*":
1303	        self.type = self.type + " " + nametok[1] + " *"
1304		token = self.token()
1305		while token != None and token[0] == "op" and token[1] == "*":
1306		    self.type = self.type + " *"
1307		    token = self.token()
1308		if token[0] == "name":
1309		    nametok = token
1310		    token = self.token()
1311		else:
1312		    self.error("struct : expecting name", token)
1313		    return token
1314	    elif token != None and token[0] == "name" and nametok != None:
1315	        self.type = self.type + " " + nametok[1]
1316		return token
1317
1318	    if nametok != None:
1319		self.lexer.push(token)
1320		token = nametok
1321	    return token
1322
1323        elif token[0] == "name" and token[1] == "enum":
1324	    if self.type == "":
1325	        self.type = token[1]
1326	    else:
1327	        self.type = self.type + " " + token[1]
1328	    self.enums = []
1329	    token = self.token()
1330	    if token != None and token[0] == "sep" and token[1] == "{":
1331		token = self.token()
1332		token = self.parseEnumBlock(token)
1333	    else:
1334		self.error("parsing enum: expecting '{'", token)
1335	    enum_type = None
1336	    if token != None and token[0] != "name":
1337	        self.lexer.push(token)
1338	        token = ("name", "enum")
1339	    else:
1340	        enum_type = token[1]
1341	    for enum in self.enums:
1342		self.index_add(enum[0], self.filename,
1343			       not self.is_header, "enum",
1344			       (enum[1], enum[2], enum_type))
1345	    return token
1346
1347	elif token[0] == "name":
1348	    if self.type == "":
1349	        self.type = token[1]
1350	    else:
1351	        self.type = self.type + " " + token[1]
1352	else:
1353	    self.error("parsing type %s: expecting a name" % (self.type),
1354	               token)
1355	    return token
1356	token = self.token()
1357        while token != None and (token[0] == "op" or
1358	      token[0] == "name" and token[1] == "const"):
1359	    self.type = self.type + " " + token[1]
1360	    token = self.token()
1361
1362	 #
1363	 # if there is a parenthesis here, this means a function type
1364	 #
1365	if token != None and token[0] == "sep" and token[1] == '(':
1366	    self.type = self.type + token[1]
1367	    token = self.token()
1368	    while token != None and token[0] == "op" and token[1] == '*':
1369	        self.type = self.type + token[1]
1370		token = self.token()
1371	    if token == None or token[0] != "name" :
1372		self.error("parsing function type, name expected", token);
1373	        return token
1374	    self.type = self.type + token[1]
1375	    nametok = token
1376	    token = self.token()
1377	    if token != None and token[0] == "sep" and token[1] == ')':
1378		self.type = self.type + token[1]
1379		token = self.token()
1380		if token != None and token[0] == "sep" and token[1] == '(':
1381		    token = self.token()
1382		    type = self.type;
1383		    token = self.parseSignature(token);
1384		    self.type = type;
1385		else:
1386		    self.error("parsing function type, '(' expected", token);
1387		    return token
1388	    else:
1389	        self.error("parsing function type, ')' expected", token);
1390		return token
1391	    self.lexer.push(token)
1392	    token = nametok
1393	    return token
1394
1395         #
1396	 # do some lookahead for arrays
1397	 #
1398	if token != None and token[0] == "name":
1399	    nametok = token
1400	    token = self.token()
1401	    if token != None and token[0] == "sep" and token[1] == '[':
1402	        self.type = self.type + nametok[1]
1403		while token != None and token[0] == "sep" and token[1] == '[':
1404		    self.type = self.type + token[1]
1405		    token = self.token()
1406		    while token != None and token[0] != 'sep' and \
1407		          token[1] != ']' and token[1] != ';':
1408			self.type = self.type + token[1]
1409			token = self.token()
1410		if token != None and token[0] == 'sep' and token[1] == ']':
1411		    self.type = self.type + token[1]
1412		    token = self.token()
1413		else:
1414		    self.error("parsing array type, ']' expected", token);
1415		    return token
1416	    elif token != None and token[0] == "sep" and token[1] == ':':
1417	         # remove :12 in case it's a limited int size
1418		token = self.token()
1419		token = self.token()
1420	    self.lexer.push(token)
1421	    token = nametok
1422
1423	return token
1424
1425     #
1426     # Parse a signature: '(' has been parsed and we scan the type definition
1427     #    up to the ')' included
1428    def parseSignature(self, token):
1429        signature = []
1430	if token != None and token[0] == "sep" and token[1] == ')':
1431	    self.signature = []
1432	    token = self.token()
1433	    return token
1434	while token != None:
1435	    token = self.parseType(token)
1436	    if token != None and token[0] == "name":
1437	        signature.append((self.type, token[1], None))
1438		token = self.token()
1439	    elif token != None and token[0] == "sep" and token[1] == ',':
1440		token = self.token()
1441		continue
1442	    elif token != None and token[0] == "sep" and token[1] == ')':
1443	         # only the type was provided
1444		if self.type == "...":
1445		    signature.append((self.type, "...", None))
1446		else:
1447		    signature.append((self.type, None, None))
1448	    if token != None and token[0] == "sep":
1449	        if token[1] == ',':
1450		    token = self.token()
1451		    continue
1452		elif token[1] == ')':
1453		    token = self.token()
1454		    break
1455	self.signature = signature
1456	return token
1457
1458     #
1459     # Parse a global definition, be it a type, variable or function
1460     # the extern "C" blocks are a bit nasty and require it to recurse.
1461     #
1462    def parseGlobal(self, token):
1463        static = 0
1464        if token[1] == 'extern':
1465	    token = self.token()
1466	    if token == None:
1467	        return token
1468	    if token[0] == 'string':
1469	        if token[1] == 'C':
1470		    token = self.token()
1471		    if token == None:
1472			return token
1473		    if token[0] == 'sep' and token[1] == "{":
1474		        token = self.token()
1475#			 print 'Entering extern "C line ', self.lineno()
1476			while token != None and (token[0] != 'sep' or
1477			      token[1] != "}"):
1478			    if token[0] == 'name':
1479				token = self.parseGlobal(token)
1480			    else:
1481				self.error(
1482				 "token %s %s unexpected at the top level" % (
1483					token[0], token[1]))
1484				token = self.parseGlobal(token)
1485#			 print 'Exiting extern "C" line', self.lineno()
1486			token = self.token()
1487			return token
1488		else:
1489		    return token
1490	elif token[1] == 'static':
1491	    static = 1
1492	    token = self.token()
1493	    if token == None or  token[0] != 'name':
1494	        return token
1495
1496	if token[1] == 'typedef':
1497	    token = self.token()
1498	    return self.parseTypedef(token)
1499	else:
1500	    token = self.parseType(token)
1501	    type_orig = self.type
1502	if token == None or token[0] != "name":
1503	    return token
1504	type = type_orig
1505	self.name = token[1]
1506	token = self.token()
1507	while token != None and (token[0] == "sep" or token[0] == "op"):
1508	    if token[0] == "sep":
1509		if token[1] == "[":
1510		    type = type + token[1]
1511		    token = self.token()
1512		    while token != None and (token[0] != "sep" or \
1513		          token[1] != ";"):
1514			type = type + token[1]
1515			token = self.token()
1516
1517	    if token != None and token[0] == "op" and token[1] == "=":
1518		 #
1519		 # Skip the initialization of the variable
1520		 #
1521		token = self.token()
1522		if token[0] == 'sep' and token[1] == '{':
1523		    token = self.token()
1524		    token = self.parseBlock(token)
1525		else:
1526		    self.comment = None
1527		    while token != None and (token[0] != "sep" or \
1528			  (token[1] != ';' and token[1] != ',')):
1529			    token = self.token()
1530		self.comment = None
1531		if token == None or token[0] != "sep" or (token[1] != ';' and
1532		   token[1] != ','):
1533		    self.error("missing ';' or ',' after value")
1534
1535	    if token != None and token[0] == "sep":
1536		if token[1] == ";":
1537		    self.comment = None
1538		    token = self.token()
1539		    if type == "struct":
1540		        self.index_add(self.name, self.filename,
1541			     not self.is_header, "struct", self.struct_fields)
1542		    else:
1543			self.index_add(self.name, self.filename,
1544			     not self.is_header, "variable", type)
1545		    break
1546		elif token[1] == "(":
1547		    token = self.token()
1548		    token = self.parseSignature(token)
1549		    if token == None:
1550			return None
1551		    if token[0] == "sep" and token[1] == ";":
1552		        d = self.mergeFunctionComment(self.name,
1553				((type, None), self.signature), 1)
1554			self.index_add(self.name, self.filename, static,
1555			                "function", d)
1556			token = self.token()
1557		    elif token[0] == "sep" and token[1] == "{":
1558		        d = self.mergeFunctionComment(self.name,
1559				((type, None), self.signature), static)
1560			self.index_add(self.name, self.filename, static,
1561			                "function", d)
1562			token = self.token()
1563			token = self.parseBlock(token);
1564		elif token[1] == ',':
1565		    self.comment = None
1566		    self.index_add(self.name, self.filename, static,
1567		                    "variable", type)
1568		    type = type_orig
1569		    token = self.token()
1570		    while token != None and token[0] == "sep":
1571		        type = type + token[1]
1572			token = self.token()
1573		    if token != None and token[0] == "name":
1574		        self.name = token[1]
1575			token = self.token()
1576		else:
1577		    break
1578
1579	return token
1580
1581    def parse(self):
1582        self.warning("Parsing %s" % (self.filename))
1583        token = self.token()
1584	while token != None:
1585            if token[0] == 'name':
1586	        token = self.parseGlobal(token)
1587            else:
1588	        self.error("token %s %s unexpected at the top level" % (
1589		       token[0], token[1]))
1590		token = self.parseGlobal(token)
1591		return
1592	self.parseTopComment(self.top_comment)
1593        return self.index
1594
1595
1596class docBuilder:
1597    """A documentation builder"""
1598    def __init__(self, name, directories=['.'], excludes=[]):
1599        self.name = name
1600        self.directories = directories
1601	self.excludes = excludes + ignored_files.keys()
1602	self.modules = {}
1603	self.headers = {}
1604	self.idx = index()
1605        self.xref = {}
1606	self.index = {}
1607	if name == 'libxml2':
1608	    self.basename = 'libxml'
1609	else:
1610	    self.basename = name
1611
1612    def indexString(self, id, str):
1613	if str == None:
1614	    return
1615	str = string.replace(str, "'", ' ')
1616	str = string.replace(str, '"', ' ')
1617	str = string.replace(str, "/", ' ')
1618	str = string.replace(str, '*', ' ')
1619	str = string.replace(str, "[", ' ')
1620	str = string.replace(str, "]", ' ')
1621	str = string.replace(str, "(", ' ')
1622	str = string.replace(str, ")", ' ')
1623	str = string.replace(str, "<", ' ')
1624	str = string.replace(str, '>', ' ')
1625	str = string.replace(str, "&", ' ')
1626	str = string.replace(str, '#', ' ')
1627	str = string.replace(str, ",", ' ')
1628	str = string.replace(str, '.', ' ')
1629	str = string.replace(str, ';', ' ')
1630	tokens = string.split(str)
1631	for token in tokens:
1632	    try:
1633		c = token[0]
1634		if string.find(string.letters, c) < 0:
1635		    pass
1636		elif len(token) < 3:
1637		    pass
1638		else:
1639		    lower = string.lower(token)
1640		    # TODO: generalize this a bit
1641		    if lower == 'and' or lower == 'the':
1642			pass
1643		    elif self.xref.has_key(token):
1644			self.xref[token].append(id)
1645		    else:
1646			self.xref[token] = [id]
1647	    except:
1648		pass
1649
1650    def analyze(self):
1651        print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
1652	self.idx.analyze()
1653
1654    def scanHeaders(self):
1655	for header in self.headers.keys():
1656	    parser = CParser(header)
1657	    idx = parser.parse()
1658	    self.headers[header] = idx;
1659	    self.idx.merge(idx)
1660
1661    def scanModules(self):
1662	for module in self.modules.keys():
1663	    parser = CParser(module)
1664	    idx = parser.parse()
1665	    # idx.analyze()
1666	    self.modules[module] = idx
1667	    self.idx.merge_public(idx)
1668
1669    def scan(self):
1670        for directory in self.directories:
1671	    files = glob.glob(directory + "/*.c")
1672	    for file in files:
1673	        skip = 0
1674		for excl in self.excludes:
1675		    if string.find(file, excl) != -1:
1676		        skip = 1;
1677			break
1678		if skip == 0:
1679		    self.modules[file] = None;
1680	    files = glob.glob(directory + "/*.h")
1681	    for file in files:
1682	        skip = 0
1683		for excl in self.excludes:
1684		    if string.find(file, excl) != -1:
1685		        skip = 1;
1686			break
1687		if skip == 0:
1688		    self.headers[file] = None;
1689	self.scanHeaders()
1690	self.scanModules()
1691
1692    def modulename_file(self, file):
1693        module = os.path.basename(file)
1694	if module[-2:] == '.h':
1695	    module = module[:-2]
1696	elif module[-2:] == '.c':
1697	    module = module[:-2]
1698	return module
1699
1700    def serialize_enum(self, output, name):
1701        id = self.idx.enums[name]
1702        output.write("    <enum name='%s' file='%s'" % (name,
1703	             self.modulename_file(id.header)))
1704	if id.info != None:
1705	    info = id.info
1706	    if info[0] != None and info[0] != '':
1707	        try:
1708		    val = eval(info[0])
1709		except:
1710		    val = info[0]
1711		output.write(" value='%s'" % (val));
1712	    if info[2] != None and info[2] != '':
1713		output.write(" type='%s'" % info[2]);
1714	    if info[1] != None and info[1] != '':
1715		output.write(" info='%s'" % escape(info[1]));
1716        output.write("/>\n")
1717
1718    def serialize_macro(self, output, name):
1719        id = self.idx.macros[name]
1720        output.write("    <macro name='%s' file='%s'>\n" % (name,
1721	             self.modulename_file(id.header)))
1722	if id.info != None:
1723            try:
1724		(args, desc) = id.info
1725		if desc != None and desc != "":
1726		    output.write("      <info>%s</info>\n" % (escape(desc)))
1727		    self.indexString(name, desc)
1728		for arg in args:
1729		    (name, desc) = arg
1730		    if desc != None and desc != "":
1731			output.write("      <arg name='%s' info='%s'/>\n" % (
1732				     name, escape(desc)))
1733			self.indexString(name, desc)
1734		    else:
1735			output.write("      <arg name='%s'/>\n" % (name))
1736            except:
1737                pass
1738        output.write("    </macro>\n")
1739
1740    def serialize_typedef(self, output, name):
1741        id = self.idx.typedefs[name]
1742	if id.info[0:7] == 'struct ':
1743	    output.write("    <struct name='%s' file='%s' type='%s'" % (
1744	             name, self.modulename_file(id.header), id.info))
1745	    name = id.info[7:]
1746	    if self.idx.structs.has_key(name) and ( \
1747	       type(self.idx.structs[name].info) == type(()) or
1748		type(self.idx.structs[name].info) == type([])):
1749	        output.write(">\n");
1750		try:
1751		    for field in self.idx.structs[name].info:
1752			desc = field[2]
1753			self.indexString(name, desc)
1754			if desc == None:
1755			    desc = ''
1756			else:
1757			    desc = escape(desc)
1758			output.write("      <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1759		except:
1760		    print "Failed to serialize struct %s" % (name)
1761		output.write("    </struct>\n")
1762	    else:
1763	        output.write("/>\n");
1764	else :
1765	    output.write("    <typedef name='%s' file='%s' type='%s'" % (
1766	                 name, self.modulename_file(id.header), id.info))
1767            try:
1768		desc = id.extra
1769		if desc != None and desc != "":
1770		    output.write(">\n      <info>%s</info>\n" % (escape(desc)))
1771		    output.write("    </typedef>\n")
1772		else:
1773		    output.write("/>\n")
1774	    except:
1775		output.write("/>\n")
1776
1777    def serialize_variable(self, output, name):
1778        id = self.idx.variables[name]
1779	if id.info != None:
1780	    output.write("    <variable name='%s' file='%s' type='%s'/>\n" % (
1781		    name, self.modulename_file(id.header), id.info))
1782	else:
1783	    output.write("    <variable name='%s' file='%s'/>\n" % (
1784	            name, self.modulename_file(id.header)))
1785
1786    def serialize_function(self, output, name):
1787        id = self.idx.functions[name]
1788	if name == debugsym:
1789	    print "=>", id
1790
1791        output.write("    <%s name='%s' file='%s' module='%s'>\n" % (id.type,
1792	             name, self.modulename_file(id.header),
1793		     self.modulename_file(id.module)))
1794	#
1795	# Processing of conditionals modified by Bill 1/1/05
1796	#
1797	if id.conditionals != None:
1798	    apstr = ""
1799	    for cond in id.conditionals:
1800	        if apstr != "":
1801		    apstr = apstr + " &amp;&amp; "
1802		apstr = apstr + cond
1803	    output.write("      <cond>%s</cond>\n"% (apstr));
1804	try:
1805	    (ret, params, desc) = id.info
1806	    if (desc == None or desc == '') and \
1807	       name[0:9] != "xmlThrDef" and name != "xmlDllMain":
1808	        print "%s %s from %s has no description" % (id.type, name,
1809		       self.modulename_file(id.module))
1810
1811	    output.write("      <info>%s</info>\n" % (escape(desc)))
1812	    self.indexString(name, desc)
1813	    if ret[0] != None:
1814	        if ret[0] == "void":
1815		    output.write("      <return type='void'/>\n")
1816		else:
1817		    output.write("      <return type='%s' info='%s'/>\n" % (
1818			     ret[0], escape(ret[1])))
1819		    self.indexString(name, ret[1])
1820	    for param in params:
1821	        if param[0] == 'void':
1822		    continue
1823	        if param[2] == None:
1824		    output.write("      <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1825		else:
1826		    output.write("      <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1827		    self.indexString(name, param[2])
1828	except:
1829	    print "Failed to save function %s info: " % name, `id.info`
1830        output.write("    </%s>\n" % (id.type))
1831
1832    def serialize_exports(self, output, file):
1833        module = self.modulename_file(file)
1834	output.write("    <file name='%s'>\n" % (module))
1835	dict = self.headers[file]
1836	if dict.info != None:
1837	    for data in ('Summary', 'Description', 'Author'):
1838		try:
1839		    output.write("     <%s>%s</%s>\n" % (
1840		                 string.lower(data),
1841				 escape(dict.info[data]),
1842				 string.lower(data)))
1843		except:
1844		    print "Header %s lacks a %s description" % (module, data)
1845	    if dict.info.has_key('Description'):
1846	        desc = dict.info['Description']
1847		if string.find(desc, "DEPRECATED") != -1:
1848		    output.write("     <deprecated/>\n")
1849
1850        ids = dict.macros.keys()
1851	ids.sort()
1852	for id in uniq(ids):
1853	    # Macros are sometime used to masquerade other types.
1854	    if dict.functions.has_key(id):
1855	        continue
1856	    if dict.variables.has_key(id):
1857	        continue
1858	    if dict.typedefs.has_key(id):
1859	        continue
1860	    if dict.structs.has_key(id):
1861	        continue
1862	    if dict.enums.has_key(id):
1863	        continue
1864	    output.write("     <exports symbol='%s' type='macro'/>\n" % (id))
1865        ids = dict.enums.keys()
1866	ids.sort()
1867	for id in uniq(ids):
1868	    output.write("     <exports symbol='%s' type='enum'/>\n" % (id))
1869        ids = dict.typedefs.keys()
1870	ids.sort()
1871	for id in uniq(ids):
1872	    output.write("     <exports symbol='%s' type='typedef'/>\n" % (id))
1873        ids = dict.structs.keys()
1874	ids.sort()
1875	for id in uniq(ids):
1876	    output.write("     <exports symbol='%s' type='struct'/>\n" % (id))
1877        ids = dict.variables.keys()
1878	ids.sort()
1879	for id in uniq(ids):
1880	    output.write("     <exports symbol='%s' type='variable'/>\n" % (id))
1881        ids = dict.functions.keys()
1882	ids.sort()
1883	for id in uniq(ids):
1884	    output.write("     <exports symbol='%s' type='function'/>\n" % (id))
1885	output.write("    </file>\n")
1886
1887    def serialize_xrefs_files(self, output):
1888        headers = self.headers.keys()
1889        headers.sort()
1890        for file in headers:
1891	    module = self.modulename_file(file)
1892	    output.write("    <file name='%s'>\n" % (module))
1893	    dict = self.headers[file]
1894	    ids = uniq(dict.functions.keys() + dict.variables.keys() + \
1895		  dict.macros.keys() + dict.typedefs.keys() + \
1896		  dict.structs.keys() + dict.enums.keys())
1897	    ids.sort()
1898	    for id in ids:
1899		output.write("      <ref name='%s'/>\n" % (id))
1900	    output.write("    </file>\n")
1901        pass
1902
1903    def serialize_xrefs_functions(self, output):
1904        funcs = {}
1905	for name in self.idx.functions.keys():
1906	    id = self.idx.functions[name]
1907	    try:
1908		(ret, params, desc) = id.info
1909		for param in params:
1910		    if param[0] == 'void':
1911			continue
1912		    if funcs.has_key(param[0]):
1913		        funcs[param[0]].append(name)
1914		    else:
1915		        funcs[param[0]] = [name]
1916	    except:
1917	        pass
1918	typ = funcs.keys()
1919	typ.sort()
1920	for type in typ:
1921	    if type == '' or type == 'void' or type == "int" or \
1922	       type == "char *" or type == "const char *" :
1923	        continue
1924	    output.write("    <type name='%s'>\n" % (type))
1925	    ids = funcs[type]
1926	    ids.sort()
1927	    pid = ''	# not sure why we have dups, but get rid of them!
1928	    for id in ids:
1929	        if id != pid:
1930	            output.write("      <ref name='%s'/>\n" % (id))
1931		    pid = id
1932	    output.write("    </type>\n")
1933
1934    def serialize_xrefs_constructors(self, output):
1935        funcs = {}
1936	for name in self.idx.functions.keys():
1937	    id = self.idx.functions[name]
1938	    try:
1939		(ret, params, desc) = id.info
1940		if ret[0] == "void":
1941		    continue
1942		if funcs.has_key(ret[0]):
1943		    funcs[ret[0]].append(name)
1944		else:
1945		    funcs[ret[0]] = [name]
1946	    except:
1947	        pass
1948	typ = funcs.keys()
1949	typ.sort()
1950	for type in typ:
1951	    if type == '' or type == 'void' or type == "int" or \
1952	       type == "char *" or type == "const char *" :
1953	        continue
1954	    output.write("    <type name='%s'>\n" % (type))
1955	    ids = funcs[type]
1956	    ids.sort()
1957	    for id in ids:
1958	        output.write("      <ref name='%s'/>\n" % (id))
1959	    output.write("    </type>\n")
1960
1961    def serialize_xrefs_alpha(self, output):
1962	letter = None
1963	ids = self.idx.identifiers.keys()
1964	ids.sort()
1965	for id in ids:
1966	    if id[0] != letter:
1967		if letter != None:
1968		    output.write("    </letter>\n")
1969		letter = id[0]
1970		output.write("    <letter name='%s'>\n" % (letter))
1971	    output.write("      <ref name='%s'/>\n" % (id))
1972	if letter != None:
1973	    output.write("    </letter>\n")
1974
1975    def serialize_xrefs_references(self, output):
1976        typ = self.idx.identifiers.keys()
1977	typ.sort()
1978	for id in typ:
1979	    idf = self.idx.identifiers[id]
1980	    module = idf.header
1981	    output.write("    <reference name='%s' href='%s'/>\n" % (id,
1982	                 'html/' + self.basename + '-' +
1983		         self.modulename_file(module) + '.html#' +
1984			 id))
1985
1986    def serialize_xrefs_index(self, output):
1987        index = self.xref
1988	typ = index.keys()
1989	typ.sort()
1990	letter = None
1991	count = 0
1992	chunk = 0
1993	chunks = []
1994	for id in typ:
1995	    if len(index[id]) > 30:
1996		continue
1997	    if id[0] != letter:
1998		if letter == None or count > 200:
1999		    if letter != None:
2000			output.write("      </letter>\n")
2001			output.write("    </chunk>\n")
2002			count = 0
2003			chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
2004		    output.write("    <chunk name='chunk%s'>\n" % (chunk))
2005		    first_letter = id[0]
2006		    chunk = chunk + 1
2007		elif letter != None:
2008		    output.write("      </letter>\n")
2009		letter = id[0]
2010		output.write("      <letter name='%s'>\n" % (letter))
2011	    output.write("        <word name='%s'>\n" % (id))
2012	    tokens = index[id];
2013	    tokens.sort()
2014	    tok = None
2015	    for token in tokens:
2016		if tok == token:
2017		    continue
2018		tok = token
2019		output.write("          <ref name='%s'/>\n" % (token))
2020		count = count + 1
2021	    output.write("        </word>\n")
2022	if letter != None:
2023	    output.write("      </letter>\n")
2024	    output.write("    </chunk>\n")
2025	    if count != 0:
2026	        chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
2027	    output.write("    <chunks>\n")
2028	    for ch in chunks:
2029		output.write("      <chunk name='%s' start='%s' end='%s'/>\n" % (
2030			     ch[0], ch[1], ch[2]))
2031	    output.write("    </chunks>\n")
2032
2033    def serialize_xrefs(self, output):
2034	output.write("  <references>\n")
2035	self.serialize_xrefs_references(output)
2036	output.write("  </references>\n")
2037	output.write("  <alpha>\n")
2038	self.serialize_xrefs_alpha(output)
2039	output.write("  </alpha>\n")
2040	output.write("  <constructors>\n")
2041	self.serialize_xrefs_constructors(output)
2042	output.write("  </constructors>\n")
2043	output.write("  <functions>\n")
2044	self.serialize_xrefs_functions(output)
2045	output.write("  </functions>\n")
2046	output.write("  <files>\n")
2047	self.serialize_xrefs_files(output)
2048	output.write("  </files>\n")
2049	output.write("  <index>\n")
2050	self.serialize_xrefs_index(output)
2051	output.write("  </index>\n")
2052
2053    def serialize(self):
2054        filename = "%s-api.xml" % self.name
2055        print "Saving XML description %s" % (filename)
2056        output = open(filename, "w")
2057        output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2058        output.write("<api name='%s'>\n" % self.name)
2059        output.write("  <files>\n")
2060        headers = self.headers.keys()
2061        headers.sort()
2062        for file in headers:
2063            self.serialize_exports(output, file)
2064        output.write("  </files>\n")
2065        output.write("  <symbols>\n")
2066        macros = self.idx.macros.keys()
2067        macros.sort()
2068        for macro in macros:
2069            self.serialize_macro(output, macro)
2070        enums = self.idx.enums.keys()
2071        enums.sort()
2072        for enum in enums:
2073            self.serialize_enum(output, enum)
2074        typedefs = self.idx.typedefs.keys()
2075        typedefs.sort()
2076        for typedef in typedefs:
2077            self.serialize_typedef(output, typedef)
2078        variables = self.idx.variables.keys()
2079        variables.sort()
2080        for variable in variables:
2081            self.serialize_variable(output, variable)
2082        functions = self.idx.functions.keys()
2083        functions.sort()
2084        for function in functions:
2085            self.serialize_function(output, function)
2086        output.write("  </symbols>\n")
2087        output.write("</api>\n")
2088        output.close()
2089
2090        filename = "%s-refs.xml" % self.name
2091        print "Saving XML Cross References %s" % (filename)
2092        output = open(filename, "w")
2093        output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2094        output.write("<apirefs name='%s'>\n" % self.name)
2095        self.serialize_xrefs(output)
2096        output.write("</apirefs>\n")
2097        output.close()
2098
2099
2100def rebuild():
2101    builder = None
2102    if glob.glob("parser.c") != [] :
2103        print "Rebuilding API description for libxml2"
2104	builder = docBuilder("libxml2", [".", "."],
2105	                     ["xmlwin32version.h", "tst.c"])
2106    elif glob.glob("../parser.c") != [] :
2107        print "Rebuilding API description for libxml2"
2108	builder = docBuilder("libxml2", ["..", "../include/libxml"],
2109	                     ["xmlwin32version.h", "tst.c"])
2110    elif glob.glob("../libxslt/transform.c") != [] :
2111        print "Rebuilding API description for libxslt"
2112	builder = docBuilder("libxslt", ["../libxslt"],
2113	                     ["win32config.h", "libxslt.h", "tst.c"])
2114    else:
2115        print "rebuild() failed, unable to guess the module"
2116	return None
2117    builder.scan()
2118    builder.analyze()
2119    builder.serialize()
2120    if glob.glob("../libexslt/exslt.c") != [] :
2121        extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
2122	extra.scan()
2123	extra.analyze()
2124	extra.serialize()
2125    return builder
2126
2127#
2128# for debugging the parser
2129#
2130def parse(filename):
2131    parser = CParser(filename)
2132    idx = parser.parse()
2133    return idx
2134
2135if __name__ == "__main__":
2136    if len(sys.argv) > 1:
2137        debug = 1
2138        parse(sys.argv[1])
2139    else:
2140	rebuild()
2141