1#!/usr/bin/python -u
2#
3# This is the API builder, it parses the C sources and build the
4# API formal description in XML.
5#
6# See Copyright for the status of this software.
7#
8# daniel@veillard.com
9#
10import os, sys
11import string
12import glob
13
14debug=0
15
16#
17# C parser analysis code
18#
19ignored_files = {
20  "trio": "too many non standard macros",
21  "trio.c": "too many non standard macros",
22  "trionan.c": "too many non standard macros",
23  "triostr.c": "too many non standard macros",
24  "acconfig.h": "generated portability layer",
25  "config.h": "generated portability layer",
26  "libxml.h": "internal only",
27  "testOOM.c": "out of memory tester",
28  "testOOMlib.h": "out of memory tester",
29  "testOOMlib.c": "out of memory tester",
30}
31
32ignored_words = {
33  "WINAPI": (0, "Windows keyword"),
34  "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
35  "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
36  "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
37  "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
38  "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
39  "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
40  "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
41  "XMLCALL": (0, "Special macro for win32 calls"),
42  "XSLTCALL": (0, "Special macro for win32 calls"),
43  "EXSLTCALL": (0, "Special macro for win32 calls"),
44  "__declspec": (3, "Windows keyword"),
45  "ATTRIBUTE_UNUSED": (0, "macro keyword"),
46  "LIBEXSLT_PUBLIC": (0, "macro keyword"),
47  "X_IN_Y": (5, "macro function builder"),
48  "XSLT_ITEM_COMMON_FIELDS": (0, "Special macro"),
49  "CALLBACK": (0, "macro keyword"),
50}
51
52def escape(raw):
53    raw = string.replace(raw, '&', '&')
54    raw = string.replace(raw, '<', '&lt;')
55    raw = string.replace(raw, '>', '&gt;')
56    raw = string.replace(raw, "'", '&apos;')
57    raw = string.replace(raw, '"', '&quot;')
58    return raw
59
60def uniq(items):
61    d = {}
62    for item in items:
63        d[item]=1
64    return d.keys()
65
66class identifier:
67    def __init__(self, name, module=None, type=None, lineno = 0,
68                 info=None, extra=None):
69        self.name = name
70	self.module = module
71	self.type = type
72	self.info = info
73	self.extra = extra
74	self.lineno = lineno
75	self.static = 0
76
77    def __repr__(self):
78        r = "%s %s:" % (self.type, self.name)
79	if self.static:
80	    r = r + " static"
81	if self.module != None:
82	    r = r + " from %s" % (self.module)
83	if self.info != None:
84	    r = r + " " +  `self.info`
85	if self.extra != None:
86	    r = r + " " + `self.extra`
87	return r
88
89
90    def set_module(self, module):
91        self.module = module
92    def set_type(self, type):
93        self.type = type
94    def set_info(self, info):
95        self.info = info
96    def set_extra(self, extra):
97        self.extra = extra
98    def set_lineno(self, lineno):
99        self.lineno = lineno
100    def set_static(self, static):
101        self.static = static
102
103    def get_name(self):
104        return self.name
105    def get_module(self):
106        return self.module
107    def get_type(self):
108        return self.type
109    def get_info(self):
110        return self.info
111    def get_lineno(self):
112        return self.lineno
113    def get_extra(self):
114        return self.extra
115    def get_static(self):
116        return self.static
117
118    def update(self, module, type = None, info = None, extra=None):
119        if module != None and self.module == None:
120	    self.set_module(module)
121        if type != None and self.type == None:
122	    self.set_type(type)
123        if info != None:
124	    self.set_info(info)
125        if extra != None:
126	    self.set_extra(extra)
127
128
129class index:
130    def __init__(self, name = "noname"):
131        self.name = name
132        self.identifiers = {}
133        self.functions = {}
134	self.variables = {}
135	self.includes = {}
136	self.structs = {}
137	self.enums = {}
138	self.typedefs = {}
139	self.macros = {}
140	self.references = {}
141	self.info = {}
142
143    def add_ref(self, name, module, static, type, lineno, info=None, extra=None):
144        if name[0:2] == '__':
145	    return None
146        d = None
147        try:
148	   d = self.identifiers[name]
149	   d.update(module, type, lineno, info, extra)
150	except:
151	   d = identifier(name, module, type, lineno, info, extra)
152	   self.identifiers[name] = d
153
154	if d != None and static == 1:
155	    d.set_static(1)
156
157	if d != None and name != None and type != None:
158	    self.references[name] = d
159
160    def add(self, name, module, static, type, lineno, info=None, extra=None):
161        if name[0:2] == '__':
162	    return None
163        d = None
164        try:
165	   d = self.identifiers[name]
166	   d.update(module, type, lineno, info, extra)
167	except:
168	   d = identifier(name, module, type, lineno, info, extra)
169	   self.identifiers[name] = d
170
171	if d != None and static == 1:
172	    d.set_static(1)
173
174	if d != None and name != None and type != None:
175	    if type == "function":
176	        self.functions[name] = d
177	    elif type == "functype":
178	        self.functions[name] = d
179	    elif type == "variable":
180	        self.variables[name] = d
181	    elif type == "include":
182	        self.includes[name] = d
183	    elif type == "struct":
184	        self.structs[name] = d
185	    elif type == "enum":
186	        self.enums[name] = d
187	    elif type == "typedef":
188	        self.typedefs[name] = d
189	    elif type == "macro":
190	        self.macros[name] = d
191	    else:
192	        print "Unable to register type ", type
193	return d
194
195    def merge(self, idx):
196        for id in idx.functions.keys():
197              #
198              # macro might be used to override functions or variables
199              # definitions
200              #
201	     if self.macros.has_key(id):
202	         del self.macros[id]
203	     if self.functions.has_key(id):
204	         print "function %s from %s redeclared in %s" % (
205		    id, self.functions[id].module, idx.functions[id].module)
206	     else:
207	         self.functions[id] = idx.functions[id]
208		 self.identifiers[id] = idx.functions[id]
209        for id in idx.variables.keys():
210              #
211              # macro might be used to override functions or variables
212              # definitions
213              #
214	     if self.macros.has_key(id):
215	         del self.macros[id]
216	     if self.variables.has_key(id):
217	         print "variable %s from %s redeclared in %s" % (
218		    id, self.variables[id].module, idx.variables[id].module)
219	     else:
220	         self.variables[id] = idx.variables[id]
221		 self.identifiers[id] = idx.variables[id]
222        for id in idx.structs.keys():
223	     if self.structs.has_key(id):
224	         print "struct %s from %s redeclared in %s" % (
225		    id, self.structs[id].module, idx.structs[id].module)
226	     else:
227	         self.structs[id] = idx.structs[id]
228		 self.identifiers[id] = idx.structs[id]
229        for id in idx.typedefs.keys():
230	     if self.typedefs.has_key(id):
231	         print "typedef %s from %s redeclared in %s" % (
232		    id, self.typedefs[id].module, idx.typedefs[id].module)
233	     else:
234	         self.typedefs[id] = idx.typedefs[id]
235		 self.identifiers[id] = idx.typedefs[id]
236        for id in idx.macros.keys():
237              #
238              # macro might be used to override functions or variables
239              # definitions
240              #
241             if self.variables.has_key(id):
242                 continue
243             if self.functions.has_key(id):
244                 continue
245             if self.enums.has_key(id):
246                 continue
247	     if self.macros.has_key(id):
248	         print "macro %s from %s redeclared in %s" % (
249		    id, self.macros[id].module, idx.macros[id].module)
250	     else:
251	         self.macros[id] = idx.macros[id]
252		 self.identifiers[id] = idx.macros[id]
253        for id in idx.enums.keys():
254	     if self.enums.has_key(id):
255	         print "enum %s from %s redeclared in %s" % (
256		    id, self.enums[id].module, idx.enums[id].module)
257	     else:
258	         self.enums[id] = idx.enums[id]
259		 self.identifiers[id] = idx.enums[id]
260
261    def merge_public(self, idx):
262        for id in idx.functions.keys():
263	     if self.functions.has_key(id):
264	         up = idx.functions[id]
265	         self.functions[id].update(None, up.type, up.info, up.extra)
266	 #     else:
267	 #         print "Function %s from %s is not declared in headers" % (
268	#	        id, idx.functions[id].module)
269	 # TODO: do the same for variables.
270
271    def analyze_dict(self, type, dict):
272        count = 0
273	public = 0
274        for name in dict.keys():
275	    id = dict[name]
276	    count = count + 1
277	    if id.static == 0:
278	        public = public + 1
279        if count != public:
280	    print "  %d %s , %d public" % (count, type, public)
281	elif count != 0:
282	    print "  %d public %s" % (count, type)
283
284
285    def analyze(self):
286	self.analyze_dict("functions", self.functions)
287	self.analyze_dict("variables", self.variables)
288	self.analyze_dict("structs", self.structs)
289	self.analyze_dict("typedefs", self.typedefs)
290	self.analyze_dict("macros", self.macros)
291
292class CLexer:
293    """A lexer for the C language, tokenize the input by reading and
294       analyzing it line by line"""
295    def __init__(self, input):
296        self.input = input
297	self.tokens = []
298	self.line = ""
299	self.lineno = 0
300
301    def getline(self):
302        line = ''
303	while line == '':
304	    line = self.input.readline()
305	    if not line:
306		return None
307	    self.lineno = self.lineno + 1
308	    line = string.lstrip(line)
309	    line = string.rstrip(line)
310	    if line == '':
311	        continue
312	    while line[-1] == '\\':
313	        line = line[:-1]
314		n = self.input.readline()
315		self.lineno = self.lineno + 1
316		n = string.lstrip(n)
317		n = string.rstrip(n)
318		if not n:
319		    break
320		else:
321		    line = line + n
322        return line
323
324    def getlineno(self):
325        return self.lineno
326
327    def push(self, token):
328        self.tokens.insert(0, token);
329
330    def debug(self):
331        print "Last token: ", self.last
332	print "Token queue: ", self.tokens
333	print "Line %d end: " % (self.lineno), self.line
334
335    def token(self):
336        while self.tokens == []:
337	    if self.line == "":
338		line = self.getline()
339	    else:
340	        line = self.line
341		self.line = ""
342	    if line == None:
343	        return None
344
345	    if line[0] == '#':
346	        self.tokens = map((lambda x: ('preproc', x)),
347		                  string.split(line))
348		break;
349	    l = len(line)
350	    if line[0] == '"' or line[0] == "'":
351	        end = line[0]
352	        line = line[1:]
353		found = 0
354		tok = ""
355		while found == 0:
356		    i = 0
357		    l = len(line)
358		    while i < l:
359			if line[i] == end:
360			    self.line = line[i+1:]
361			    line = line[:i]
362			    l = i
363			    found = 1
364			    break
365			if line[i] == '\\':
366			    i = i + 1
367			i = i + 1
368		    tok = tok + line
369		    if found == 0:
370		        line = self.getline()
371			if line == None:
372			    return None
373		self.last = ('string', tok)
374		return self.last
375
376	    if l >= 2 and line[0] == '/' and line[1] == '*':
377	        line = line[2:]
378		found = 0
379		tok = ""
380		while found == 0:
381		    i = 0
382		    l = len(line)
383		    while i < l:
384			if line[i] == '*' and i+1 < l and line[i+1] == '/':
385			    self.line = line[i+2:]
386			    line = line[:i-1]
387			    l = i
388			    found = 1
389			    break
390			i = i + 1
391	            if tok != "":
392		        tok = tok + "\n"
393		    tok = tok + line
394		    if found == 0:
395		        line = self.getline()
396			if line == None:
397			    return None
398		self.last = ('comment', tok)
399		return self.last
400	    if l >= 2 and line[0] == '/' and line[1] == '/':
401	        line = line[2:]
402		self.last = ('comment', line)
403		return self.last
404	    i = 0
405	    while i < l:
406	        if line[i] == '/' and i+1 < l and line[i+1] == '/':
407		    self.line = line[i:]
408		    line = line[:i]
409		    break
410	        if line[i] == '/' and i+1 < l and line[i+1] == '*':
411		    self.line = line[i:]
412		    line = line[:i]
413		    break
414		if line[i] == '"' or line[i] == "'":
415		    self.line = line[i:]
416		    line = line[:i]
417		    break
418		i = i + 1
419	    l = len(line)
420	    i = 0
421	    while i < l:
422	        if line[i] == ' ' or line[i] == '\t':
423		    i = i + 1
424		    continue
425		o = ord(line[i])
426		if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
427		   (o >= 48 and o <= 57):
428		    s = i
429		    while i < l:
430			o = ord(line[i])
431			if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
432			   (o >= 48 and o <= 57) or string.find(
433			       " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
434			    i = i + 1
435			else:
436			    break
437		    self.tokens.append(('name', line[s:i]))
438		    continue
439		if string.find("(){}:;,[]", line[i]) != -1:
440#                 if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
441#		    line[i] == '}' or line[i] == ':' or line[i] == ';' or \
442#		    line[i] == ',' or line[i] == '[' or line[i] == ']':
443		    self.tokens.append(('sep', line[i]))
444		    i = i + 1
445		    continue
446		if string.find("+-*><=/%&!|.", line[i]) != -1:
447#                 if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
448#		    line[i] == '>' or line[i] == '<' or line[i] == '=' or \
449#		    line[i] == '/' or line[i] == '%' or line[i] == '&' or \
450#		    line[i] == '!' or line[i] == '|' or line[i] == '.':
451		    if line[i] == '.' and  i + 2 < l and \
452		       line[i+1] == '.' and line[i+2] == '.':
453			self.tokens.append(('name', '...'))
454			i = i + 3
455			continue
456
457		    j = i + 1
458		    if j < l and (
459		       string.find("+-*><=/%&!|", line[j]) != -1):
460#		        line[j] == '+' or line[j] == '-' or line[j] == '*' or \
461#			line[j] == '>' or line[j] == '<' or line[j] == '=' or \
462#			line[j] == '/' or line[j] == '%' or line[j] == '&' or \
463#			line[j] == '!' or line[j] == '|'):
464			self.tokens.append(('op', line[i:j+1]))
465			i = j + 1
466		    else:
467			self.tokens.append(('op', line[i]))
468			i = i + 1
469		    continue
470		s = i
471		while i < l:
472		    o = ord(line[i])
473		    if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
474		       (o >= 48 and o <= 57) or (
475		        string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
476#		         line[i] != ' ' and line[i] != '\t' and
477#			 line[i] != '(' and line[i] != ')' and
478#			 line[i] != '{'  and line[i] != '}' and
479#			 line[i] != ':' and line[i] != ';' and
480#			 line[i] != ',' and line[i] != '+' and
481#			 line[i] != '-' and line[i] != '*' and
482#			 line[i] != '/' and line[i] != '%' and
483#			 line[i] != '&' and line[i] != '!' and
484#			 line[i] != '|' and line[i] != '[' and
485#			 line[i] != ']' and line[i] != '=' and
486#			 line[i] != '*' and line[i] != '>' and
487#			 line[i] != '<'):
488			i = i + 1
489		    else:
490		        break
491		self.tokens.append(('name', line[s:i]))
492
493	tok = self.tokens[0]
494	self.tokens = self.tokens[1:]
495	self.last = tok
496	return tok
497
498class CParser:
499    """The C module parser"""
500    def __init__(self, filename, idx = None):
501        self.filename = filename
502	if len(filename) > 2 and filename[-2:] == '.h':
503	    self.is_header = 1
504	else:
505	    self.is_header = 0
506        self.input = open(filename)
507	self.lexer = CLexer(self.input)
508	if idx == None:
509	    self.index = index()
510	else:
511	    self.index = idx
512	self.top_comment = ""
513	self.last_comment = ""
514	self.comment = None
515	self.collect_ref = 0
516	self.no_error = 0
517
518    def collect_references(self):
519        self.collect_ref = 1
520
521    def stop_error(self):
522        self.no_error = 1
523
524    def start_error(self):
525        self.no_error = 0
526
527    def lineno(self):
528        return self.lexer.getlineno()
529
530    def index_add(self, name, module, static, type, info=None, extra = None):
531        self.index.add(name, module, static, type, self.lineno(),
532	               info, extra)
533
534    def index_add_ref(self, name, module, static, type, info=None,
535                      extra = None):
536        self.index.add_ref(name, module, static, type, self.lineno(),
537	               info, extra)
538
539    def warning(self, msg):
540        if self.no_error:
541	    return
542	print msg
543
544    def error(self, msg, token=-1):
545        if self.no_error:
546	    return
547
548        print "Parse Error: " + msg
549	if token != -1:
550	    print "Got token ", token
551	self.lexer.debug()
552	sys.exit(1)
553
554    def debug(self, msg, token=-1):
555        print "Debug: " + msg
556	if token != -1:
557	    print "Got token ", token
558	self.lexer.debug()
559
560    def parseTopComment(self, comment):
561	res = {}
562	lines = string.split(comment, "\n")
563	item = None
564	for line in lines:
565	    while line != "" and (line[0] == ' ' or line[0] == '\t'):
566		line = line[1:]
567	    while line != "" and line[0] == '*':
568		line = line[1:]
569	    while line != "" and (line[0] == ' ' or line[0] == '\t'):
570		line = line[1:]
571	    try:
572		(it, line) = string.split(line, ":", 1)
573		item = it
574		while line != "" and (line[0] == ' ' or line[0] == '\t'):
575		    line = line[1:]
576		if res.has_key(item):
577		    res[item] = res[item] + " " + line
578		else:
579		    res[item] = line
580	    except:
581		if item != None:
582		    if res.has_key(item):
583			res[item] = res[item] + " " + line
584		    else:
585			res[item] = line
586	self.index.info = res
587
588    def parseComment(self, token):
589        if self.top_comment == "":
590	    self.top_comment = token[1]
591	if self.comment == None or token[1][0] == '*':
592	    self.comment = token[1];
593	else:
594	    self.comment = self.comment + token[1]
595	token = self.lexer.token()
596
597        if string.find(self.comment, "DOC_DISABLE") != -1:
598	    self.stop_error()
599
600        if string.find(self.comment, "DOC_ENABLE") != -1:
601	    self.start_error()
602
603	return token
604
605     #
606     # Parse a comment block associate to a macro
607     #
608    def parseMacroComment(self, name, quiet = 0):
609        if name[0:2] == '__':
610	    quiet = 1
611
612        args = []
613	desc = ""
614
615        if self.comment == None:
616	    if not quiet:
617		self.warning("Missing comment for macro %s" % (name))
618	    return((args, desc))
619        if self.comment[0] != '*':
620	    if not quiet:
621		self.warning("Missing * in macro comment for %s" % (name))
622	    return((args, desc))
623	lines = string.split(self.comment, '\n')
624	if lines[0] == '*':
625	    del lines[0]
626	if lines[0] != "* %s:" % (name):
627	    if not quiet:
628		self.warning("Misformatted macro comment for %s" % (name))
629		self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
630	    return((args, desc))
631	del lines[0]
632	while lines[0] == '*':
633	    del lines[0]
634	while len(lines) > 0 and lines[0][0:3] == '* @':
635	    l = lines[0][3:]
636	    try:
637	        (arg, desc) = string.split(l, ':', 1)
638		desc=string.strip(desc)
639		arg=string.strip(arg)
640            except:
641		if not quiet:
642		    self.warning("Misformatted macro comment for %s" % (name))
643		    self.warning("  problem with '%s'" % (lines[0]))
644		del lines[0]
645		continue
646	    del lines[0]
647	    l = string.strip(lines[0])
648	    while len(l) > 2 and l[0:3] != '* @':
649	        while l[0] == '*':
650		    l = l[1:]
651		desc = desc + ' ' + string.strip(l)
652		del lines[0]
653		if len(lines) == 0:
654		    break
655		l = lines[0]
656            args.append((arg, desc))
657	while len(lines) > 0 and lines[0] == '*':
658	    del lines[0]
659	desc = ""
660	while len(lines) > 0:
661	    l = lines[0]
662	    while len(l) > 0 and l[0] == '*':
663	        l = l[1:]
664	    l = string.strip(l)
665	    desc = desc + " " + l
666	    del lines[0]
667
668	desc = string.strip(desc)
669
670	if quiet == 0:
671	    if desc == "":
672	        self.warning("Macro comment for %s lack description of the macro" % (name))
673
674	return((args, desc))
675
676     #
677     # Parse a comment block and merge the informations found in the
678     # parameters descriptions, finally returns a block as complete
679     # as possible
680     #
681    def mergeFunctionComment(self, name, description, quiet = 0):
682        if name == 'main':
683	    quiet = 1
684        if name[0:2] == '__':
685	    quiet = 1
686
687	(ret, args) = description
688	desc = ""
689	retdesc = ""
690
691        if self.comment == None:
692	    if not quiet:
693		self.warning("Missing comment for function %s" % (name))
694	    return(((ret[0], retdesc), args, desc))
695        if self.comment[0] != '*':
696	    if not quiet:
697		self.warning("Missing * in function comment for %s" % (name))
698	    return(((ret[0], retdesc), args, desc))
699	lines = string.split(self.comment, '\n')
700	if lines[0] == '*':
701	    del lines[0]
702	if lines[0] != "* %s:" % (name):
703	    if not quiet:
704		self.warning("Misformatted function comment for %s" % (name))
705		self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
706	    return(((ret[0], retdesc), args, desc))
707	del lines[0]
708	while len(lines) > 0 and lines[0] == '*':
709	    del lines[0]
710	nbargs = len(args)
711	while len(lines) > 0 and lines[0][0:3] == '* @':
712	    l = lines[0][3:]
713	    try:
714	        (arg, desc) = string.split(l, ':', 1)
715		desc=string.strip(desc)
716		arg=string.strip(arg)
717            except:
718		if not quiet:
719		    self.warning("Misformatted function comment for %s" % (name))
720		    self.warning("  problem with '%s'" % (lines[0]))
721		del lines[0]
722		continue
723	    del lines[0]
724	    l = string.strip(lines[0])
725	    while len(l) > 2 and l[0:3] != '* @':
726	        while l[0] == '*':
727		    l = l[1:]
728		desc = desc + ' ' + string.strip(l)
729		del lines[0]
730		if len(lines) == 0:
731		    break
732		l = lines[0]
733	    i = 0
734	    while i < nbargs:
735	        if args[i][1] == arg:
736		    args[i] = (args[i][0], arg, desc)
737		    break;
738		i = i + 1
739	    if i >= nbargs:
740		if not quiet:
741		    self.warning("Unable to find arg %s from function comment for %s" % (
742		       arg, name))
743	while len(lines) > 0 and lines[0] == '*':
744	    del lines[0]
745	desc = ""
746	while len(lines) > 0:
747	    l = lines[0]
748	    while len(l) > 0 and l[0] == '*':
749	        l = l[1:]
750	    l = string.strip(l)
751	    if len(l) >= 6 and  l[0:6] == "return" or l[0:6] == "Return":
752	        try:
753		    l = string.split(l, ' ', 1)[1]
754		except:
755		    l = ""
756		retdesc = string.strip(l)
757		del lines[0]
758		while len(lines) > 0:
759		    l = lines[0]
760		    while len(l) > 0 and l[0] == '*':
761			l = l[1:]
762		    l = string.strip(l)
763		    retdesc = retdesc + " " + l
764		    del lines[0]
765	    else:
766	        desc = desc + " " + l
767		del lines[0]
768
769	retdesc = string.strip(retdesc)
770	desc = string.strip(desc)
771
772	if quiet == 0:
773	     #
774	     # report missing comments
775	     #
776	    i = 0
777	    while i < nbargs:
778	        if args[i][2] == None and args[i][0] != "void" and args[i][1] != None:
779		    self.warning("Function comment for %s lack description of arg %s" % (name, args[i][1]))
780		i = i + 1
781	    if retdesc == "" and ret[0] != "void":
782		self.warning("Function comment for %s lack description of return value" % (name))
783	    if desc == "":
784	        self.warning("Function comment for %s lack description of the function" % (name))
785
786
787	return(((ret[0], retdesc), args, desc))
788
789    def parsePreproc(self, token):
790        name = token[1]
791	if name == "#include":
792	    token = self.lexer.token()
793	    if token == None:
794	        return None
795	    if token[0] == 'preproc':
796		self.index_add(token[1], self.filename, not self.is_header,
797		                "include")
798		return self.lexer.token()
799	    return token
800	if name == "#define":
801	    token = self.lexer.token()
802	    if token == None:
803	        return None
804	    if token[0] == 'preproc':
805	         # TODO macros with arguments
806		name = token[1]
807	        lst = []
808		token = self.lexer.token()
809		while token != None and token[0] == 'preproc' and \
810		      token[1][0] != '#':
811		    lst.append(token[1])
812		    token = self.lexer.token()
813                try:
814		    name = string.split(name, '(') [0]
815                except:
816                    pass
817                info = self.parseMacroComment(name, not self.is_header)
818		self.index_add(name, self.filename, not self.is_header,
819		                "macro", info)
820		return token
821	token = self.lexer.token()
822	while token != None and token[0] == 'preproc' and \
823	    token[1][0] != '#':
824	    token = self.lexer.token()
825	return token
826
827     #
828     # token acquisition on top of the lexer, it handle internally
829     # preprocessor and comments since they are logically not part of
830     # the program structure.
831     #
832    def token(self):
833        global ignored_words
834
835        token = self.lexer.token()
836	while token != None:
837	    if token[0] == 'comment':
838		token = self.parseComment(token)
839		continue
840	    elif token[0] == 'preproc':
841		token = self.parsePreproc(token)
842		continue
843	    elif token[0] == "name" and ignored_words.has_key(token[1]):
844	        (n, info) = ignored_words[token[1]]
845		i = 0
846		while i < n:
847		    token = self.lexer.token()
848		    i = i + 1
849		token = self.lexer.token()
850		continue
851	    else:
852	        if debug:
853		    print "=> ", token
854	        return token
855	return None
856
857     #
858     # Parse a typedef, it records the type and its name.
859     #
860    def parseTypedef(self, token):
861        if token == None:
862	    return None
863	token = self.parseType(token)
864	if token == None:
865	    self.error("parsing typedef")
866	    return None
867	base_type = self.type
868	type = base_type
869	 #self.debug("end typedef type", token)
870	while token != None:
871	    if token[0] == "name":
872		name = token[1]
873		signature = self.signature
874		if signature != None:
875		    type = string.split(type, '(')[0]
876		    d = self.mergeFunctionComment(name,
877			    ((type, None), signature), 1)
878		    self.index_add(name, self.filename, not self.is_header,
879				    "functype", d)
880		else:
881		    if base_type == "struct":
882			self.index_add(name, self.filename, not self.is_header,
883					"struct", type)
884			base_type = "struct " + name
885	            else:
886			self.index_add(name, self.filename, not self.is_header,
887		                    "typedef", type)
888		token = self.token()
889	    else:
890		self.error("parsing typedef: expecting a name")
891		return token
892	     #self.debug("end typedef", token)
893	    if token != None and token[0] == 'sep' and token[1] == ',':
894	        type = base_type
895	        token = self.token()
896		while token != None and token[0] == "op":
897		    type = type + token[1]
898		    token = self.token()
899	    elif token != None and token[0] == 'sep' and token[1] == ';':
900	        break;
901	    elif token != None and token[0] == 'name':
902	        type = base_type
903	        continue;
904	    else:
905		self.error("parsing typedef: expecting ';'", token)
906		return token
907	token = self.token()
908	return token
909
910     #
911     # Parse a C code block, used for functions it parse till
912     # the balancing } included
913     #
914    def parseBlock(self, token):
915        while token != None:
916	    if token[0] == "sep" and token[1] == "{":
917	        token = self.token()
918		token = self.parseBlock(token)
919	    elif token[0] == "sep" and token[1] == "}":
920	        self.comment = None
921	        token = self.token()
922		return token
923	    else:
924	        if self.collect_ref == 1:
925		    oldtok = token
926		    token = self.token()
927		    if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
928		        if token[0] == "sep" and token[1] == "(":
929			    self.index_add_ref(oldtok[1], self.filename,
930			                        0, "function")
931			    token = self.token()
932			elif token[0] == "name":
933			    token = self.token()
934			    if token[0] == "sep" and (token[1] == ";" or
935			       token[1] == "," or token[1] == "="):
936				self.index_add_ref(oldtok[1], self.filename,
937						    0, "type")
938		    elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
939			self.index_add_ref(oldtok[1], self.filename,
940					    0, "typedef")
941		    elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
942			self.index_add_ref(oldtok[1], self.filename,
943					    0, "typedef")
944
945		else:
946		    token = self.token()
947	return token
948
949     #
950     # Parse a C struct definition till the balancing }
951     #
952    def parseStruct(self, token):
953        fields = []
954	 #self.debug("start parseStruct", token)
955        while token != None:
956	    if token[0] == "sep" and token[1] == "{":
957	        token = self.token()
958		token = self.parseTypeBlock(token)
959	    elif token[0] == "sep" and token[1] == "}":
960		self.struct_fields = fields
961		 #self.debug("end parseStruct", token)
962		 #print fields
963	        token = self.token()
964		return token
965	    else:
966	        base_type = self.type
967		 #self.debug("before parseType", token)
968		token = self.parseType(token)
969		 #self.debug("after parseType", token)
970		if token != None and token[0] == "name":
971		    fname = token[1]
972		    token = self.token()
973		    if token[0] == "sep" and token[1] == ";":
974		        self.comment = None
975		        token = self.token()
976			fields.append((self.type, fname, self.comment))
977			self.comment = None
978		    else:
979		        self.error("parseStruct: expecting ;", token)
980		elif token != None and token[0] == "sep" and token[1] == "{":
981		    token = self.token()
982		    token = self.parseTypeBlock(token)
983		    if token != None and token[0] == "name":
984			token = self.token()
985		    if token != None and token[0] == "sep" and token[1] == ";":
986			token = self.token()
987		    else:
988		        self.error("parseStruct: expecting ;", token)
989		else:
990		    self.error("parseStruct: name", token)
991		    token = self.token()
992		self.type = base_type;
993        self.struct_fields = fields
994	 #self.debug("end parseStruct", token)
995	 #print fields
996	return token
997
998     #
999     # Parse a C enum block, parse till the balancing }
1000     #
1001    def parseEnumBlock(self, token):
1002        self.enums = []
1003	name = None
1004	self.comment = None
1005	comment = ""
1006	value = "0"
1007        while token != None:
1008	    if token[0] == "sep" and token[1] == "{":
1009	        token = self.token()
1010		token = self.parseTypeBlock(token)
1011	    elif token[0] == "sep" and token[1] == "}":
1012		if name != None:
1013		    if self.comment != None:
1014			comment = self.comment
1015			self.comment = None
1016		    self.enums.append((name, value, comment))
1017	        token = self.token()
1018		return token
1019	    elif token[0] == "name":
1020		    if name != None:
1021			if self.comment != None:
1022			    comment = string.strip(self.comment)
1023			    self.comment = None
1024			self.enums.append((name, value, comment))
1025		    name = token[1]
1026		    comment = ""
1027		    token = self.token()
1028		    if token[0] == "op" and token[1][0] == "=":
1029		        value = ""
1030		        if len(token[1]) > 1:
1031			    value = token[1][1:]
1032		        token = self.token()
1033		        while token[0] != "sep" or (token[1] != ',' and
1034			      token[1] != '}'):
1035			    value = value + token[1]
1036			    token = self.token()
1037		    else:
1038		        try:
1039			    value = "%d" % (int(value) + 1)
1040			except:
1041			    self.warning("Failed to compute value of enum %s" % (name))
1042			    value=""
1043		    if token[0] == "sep" and token[1] == ",":
1044			token = self.token()
1045	    else:
1046	        token = self.token()
1047	return token
1048
1049     #
1050     # Parse a C definition block, used for structs it parse till
1051     # the balancing }
1052     #
1053    def parseTypeBlock(self, token):
1054        while token != None:
1055	    if token[0] == "sep" and token[1] == "{":
1056	        token = self.token()
1057		token = self.parseTypeBlock(token)
1058	    elif token[0] == "sep" and token[1] == "}":
1059	        token = self.token()
1060		return token
1061	    else:
1062	        token = self.token()
1063	return token
1064
1065     #
1066     # Parse a type: the fact that the type name can either occur after
1067     #    the definition or within the definition makes it a little harder
1068     #    if inside, the name token is pushed back before returning
1069     #
1070    def parseType(self, token):
1071        self.type = ""
1072	self.struct_fields = []
1073        self.signature = None
1074	if token == None:
1075	    return token
1076
1077	while token[0] == "name" and (
1078	      token[1] == "const" or token[1] == "unsigned" or
1079	      token[1] == "signed"):
1080	    if self.type == "":
1081	        self.type = token[1]
1082	    else:
1083	        self.type = self.type + " " + token[1]
1084	    token = self.token()
1085
1086        if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
1087	    if self.type == "":
1088	        self.type = token[1]
1089	    else:
1090	        self.type = self.type + " " + token[1]
1091	    if token[0] == "name" and token[1] == "int":
1092		if self.type == "":
1093		    self.type = tmp[1]
1094		else:
1095		    self.type = self.type + " " + tmp[1]
1096
1097        elif token[0] == "name" and token[1] == "struct":
1098	    if self.type == "":
1099	        self.type = token[1]
1100	    else:
1101	        self.type = self.type + " " + token[1]
1102	    token = self.token()
1103	    nametok = None
1104	    if token[0] == "name":
1105	        nametok = token
1106		token = self.token()
1107	    if token != None and token[0] == "sep" and token[1] == "{":
1108		token = self.token()
1109		token = self.parseStruct(token)
1110	    elif token != None and token[0] == "op" and token[1] == "*":
1111	        self.type = self.type + " " + nametok[1] + " *"
1112		token = self.token()
1113		while token != None and token[0] == "op" and token[1] == "*":
1114		    self.type = self.type + " *"
1115		    token = self.token()
1116		if token[0] == "name":
1117		    nametok = token
1118		    token = self.token()
1119		else:
1120		    self.error("struct : expecting name", token)
1121		    return token
1122	    elif token != None and token[0] == "name" and nametok != None:
1123	        self.type = self.type + " " + nametok[1]
1124		return token
1125
1126	    if nametok != None:
1127		self.lexer.push(token)
1128		token = nametok
1129	    return token
1130
1131        elif token[0] == "name" and token[1] == "enum":
1132	    if self.type == "":
1133	        self.type = token[1]
1134	    else:
1135	        self.type = self.type + " " + token[1]
1136	    self.enums = []
1137	    token = self.token()
1138	    if token != None and token[0] == "sep" and token[1] == "{":
1139		token = self.token()
1140		token = self.parseEnumBlock(token)
1141	    else:
1142		self.error("parsing enum: expecting '{'", token)
1143	    enum_type = None
1144	    if token != None and token[0] != "name":
1145	        self.lexer.push(token)
1146	        token = ("name", "enum")
1147	    else:
1148	        enum_type = token[1]
1149	    for enum in self.enums:
1150		self.index_add(enum[0], self.filename,
1151			       not self.is_header, "enum",
1152			       (enum[1], enum[2], enum_type))
1153	    return token
1154
1155	elif token[0] == "name":
1156	    if self.type == "":
1157	        self.type = token[1]
1158	    else:
1159	        self.type = self.type + " " + token[1]
1160	else:
1161	    self.error("parsing type %s: expecting a name" % (self.type),
1162	               token)
1163	    return token
1164	token = self.token()
1165        while token != None and (token[0] == "op" or
1166	      token[0] == "name" and token[1] == "const"):
1167	    self.type = self.type + " " + token[1]
1168	    token = self.token()
1169
1170	 #
1171	 # if there is a parenthesis here, this means a function type
1172	 #
1173	if token != None and token[0] == "sep" and token[1] == '(':
1174	    self.type = self.type + token[1]
1175	    token = self.token()
1176	    while token != None and token[0] == "op" and token[1] == '*':
1177	        self.type = self.type + token[1]
1178		token = self.token()
1179	    if token == None or token[0] != "name" :
1180		self.error("parsing function type, name expected", token);
1181	        return token
1182	    self.type = self.type + token[1]
1183	    nametok = token
1184	    token = self.token()
1185	    if token != None and token[0] == "sep" and token[1] == ')':
1186		self.type = self.type + token[1]
1187		token = self.token()
1188		if token != None and token[0] == "sep" and token[1] == '(':
1189		    token = self.token()
1190		    type = self.type;
1191		    token = self.parseSignature(token);
1192		    self.type = type;
1193		else:
1194		    self.error("parsing function type, '(' expected", token);
1195		    return token
1196	    else:
1197	        self.error("parsing function type, ')' expected", token);
1198		return token
1199	    self.lexer.push(token)
1200	    token = nametok
1201	    return token
1202
1203         #
1204	 # do some lookahead for arrays
1205	 #
1206	if token != None and token[0] == "name":
1207	    nametok = token
1208	    token = self.token()
1209	    if token != None and token[0] == "sep" and token[1] == '[':
1210	        self.type = self.type + nametok[1]
1211		while token != None and token[0] == "sep" and token[1] == '[':
1212		    self.type = self.type + token[1]
1213		    token = self.token()
1214		    while token != None and token[0] != 'sep' and \
1215		          token[1] != ']' and token[1] != ';':
1216			self.type = self.type + token[1]
1217			token = self.token()
1218		if token != None and token[0] == 'sep' and token[1] == ']':
1219		    self.type = self.type + token[1]
1220		    token = self.token()
1221		else:
1222		    self.error("parsing array type, ']' expected", token);
1223		    return token
1224	    elif token != None and token[0] == "sep" and token[1] == ':':
1225	         # remove :12 in case it's a limited int size
1226		token = self.token()
1227		token = self.token()
1228	    self.lexer.push(token)
1229	    token = nametok
1230
1231	return token
1232
1233     #
1234     # Parse a signature: '(' has been parsed and we scan the type definition
1235     #    up to the ')' included
1236    def parseSignature(self, token):
1237        signature = []
1238	if token != None and token[0] == "sep" and token[1] == ')':
1239	    self.signature = []
1240	    token = self.token()
1241	    return token
1242	while token != None:
1243	    token = self.parseType(token)
1244	    if token != None and token[0] == "name":
1245	        signature.append((self.type, token[1], None))
1246		token = self.token()
1247	    elif token != None and token[0] == "sep" and token[1] == ',':
1248		token = self.token()
1249		continue
1250	    elif token != None and token[0] == "sep" and token[1] == ')':
1251	         # only the type was provided
1252		if self.type == "...":
1253		    signature.append((self.type, "...", None))
1254		else:
1255		    signature.append((self.type, None, None))
1256	    if token != None and token[0] == "sep":
1257	        if token[1] == ',':
1258		    token = self.token()
1259		    continue
1260		elif token[1] == ')':
1261		    token = self.token()
1262		    break
1263	self.signature = signature
1264	return token
1265
1266     #
1267     # Parse a global definition, be it a type, variable or function
1268     # the extern "C" blocks are a bit nasty and require it to recurse.
1269     #
1270    def parseGlobal(self, token):
1271        static = 0
1272        if token[1] == 'extern':
1273	    token = self.token()
1274	    if token == None:
1275	        return token
1276	    if token[0] == 'string':
1277	        if token[1] == 'C':
1278		    token = self.token()
1279		    if token == None:
1280			return token
1281		    if token[0] == 'sep' and token[1] == "{":
1282		        token = self.token()
1283#			 print 'Entering extern "C line ', self.lineno()
1284			while token != None and (token[0] != 'sep' or
1285			      token[1] != "}"):
1286			    if token[0] == 'name':
1287				token = self.parseGlobal(token)
1288			    else:
1289				self.error(
1290				 "token %s %s unexpected at the top level" % (
1291					token[0], token[1]))
1292				token = self.parseGlobal(token)
1293#			 print 'Exiting extern "C" line', self.lineno()
1294			token = self.token()
1295			return token
1296		else:
1297		    return token
1298	elif token[1] == 'static':
1299	    static = 1
1300	    token = self.token()
1301	    if token == None or  token[0] != 'name':
1302	        return token
1303
1304	if token[1] == 'typedef':
1305	    token = self.token()
1306	    return self.parseTypedef(token)
1307	else:
1308	    token = self.parseType(token)
1309	    type_orig = self.type
1310	if token == None or token[0] != "name":
1311	    return token
1312	type = type_orig
1313	self.name = token[1]
1314	token = self.token()
1315	while token != None and (token[0] == "sep" or token[0] == "op"):
1316	    if token[0] == "sep":
1317		if token[1] == "[":
1318		    type = type + token[1]
1319		    token = self.token()
1320		    while token != None and (token[0] != "sep" or \
1321		          token[1] != ";"):
1322			type = type + token[1]
1323			token = self.token()
1324
1325	    if token != None and token[0] == "op" and token[1] == "=":
1326		 #
1327		 # Skip the initialization of the variable
1328		 #
1329		token = self.token()
1330		if token[0] == 'sep' and token[1] == '{':
1331		    token = self.token()
1332		    token = self.parseBlock(token)
1333		else:
1334		    self.comment = None
1335		    while token != None and (token[0] != "sep" or \
1336			  (token[1] != ';' and token[1] != ',')):
1337			    token = self.token()
1338		self.comment = None
1339		if token == None or token[0] != "sep" or (token[1] != ';' and
1340		   token[1] != ','):
1341		    self.error("missing ';' or ',' after value")
1342
1343	    if token != None and token[0] == "sep":
1344		if token[1] == ";":
1345		    self.comment = None
1346		    token = self.token()
1347		    if type == "struct":
1348		        self.index_add(self.name, self.filename,
1349			     not self.is_header, "struct", self.struct_fields)
1350		    else:
1351			self.index_add(self.name, self.filename,
1352			     not self.is_header, "variable", type)
1353		    break
1354		elif token[1] == "(":
1355		    token = self.token()
1356		    token = self.parseSignature(token)
1357		    if token == None:
1358			return None
1359		    if token[0] == "sep" and token[1] == ";":
1360		        d = self.mergeFunctionComment(self.name,
1361				((type, None), self.signature), 1)
1362			self.index_add(self.name, self.filename, static,
1363			                "function", d)
1364			token = self.token()
1365		    elif token[0] == "sep" and token[1] == "{":
1366		        d = self.mergeFunctionComment(self.name,
1367				((type, None), self.signature), static)
1368			self.index_add(self.name, self.filename, static,
1369			                "function", d)
1370			token = self.token()
1371			token = self.parseBlock(token);
1372		elif token[1] == ',':
1373		    self.comment = None
1374		    self.index_add(self.name, self.filename, static,
1375		                    "variable", type)
1376		    type = type_orig
1377		    token = self.token()
1378		    while token != None and token[0] == "sep":
1379		        type = type + token[1]
1380			token = self.token()
1381		    if token != None and token[0] == "name":
1382		        self.name = token[1]
1383			token = self.token()
1384		else:
1385		    break
1386
1387	return token
1388
1389    def parse(self):
1390        self.warning("Parsing %s" % (self.filename))
1391        token = self.token()
1392	while token != None:
1393            if token[0] == 'name':
1394	        token = self.parseGlobal(token)
1395            else:
1396	        self.error("token %s %s unexpected at the top level" % (
1397		       token[0], token[1]))
1398		token = self.parseGlobal(token)
1399		return
1400	self.parseTopComment(self.top_comment)
1401        return self.index
1402
1403
1404class docBuilder:
1405    """A documentation builder"""
1406    def __init__(self, name, directories=['.'], excludes=[]):
1407        self.name = name
1408        self.directories = directories
1409	self.excludes = excludes + ignored_files.keys()
1410	self.modules = {}
1411	self.headers = {}
1412	self.idx = index()
1413        self.xref = {}
1414	self.index = {}
1415	if name == 'libxml2':
1416	    self.basename = 'libxml'
1417	else:
1418	    self.basename = name
1419
1420    def indexString(self, id, str):
1421	if str == None:
1422	    return
1423	str = string.replace(str, "'", ' ')
1424	str = string.replace(str, '"', ' ')
1425	str = string.replace(str, "/", ' ')
1426	str = string.replace(str, '*', ' ')
1427	str = string.replace(str, "[", ' ')
1428	str = string.replace(str, "]", ' ')
1429	str = string.replace(str, "(", ' ')
1430	str = string.replace(str, ")", ' ')
1431	str = string.replace(str, "<", ' ')
1432	str = string.replace(str, '>', ' ')
1433	str = string.replace(str, "&", ' ')
1434	str = string.replace(str, '#', ' ')
1435	str = string.replace(str, ",", ' ')
1436	str = string.replace(str, '.', ' ')
1437	str = string.replace(str, ';', ' ')
1438	tokens = string.split(str)
1439	for token in tokens:
1440	    try:
1441		c = token[0]
1442		if string.find(string.letters, c) < 0:
1443		    pass
1444		elif len(token) < 3:
1445		    pass
1446		else:
1447		    lower = string.lower(token)
1448		    # TODO: generalize this a bit
1449		    if lower == 'and' or lower == 'the':
1450			pass
1451		    elif self.xref.has_key(token):
1452			self.xref[token].append(id)
1453		    else:
1454			self.xref[token] = [id]
1455	    except:
1456		pass
1457
1458    def analyze(self):
1459        print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
1460	self.idx.analyze()
1461
1462    def scanHeaders(self):
1463	for header in self.headers.keys():
1464	    parser = CParser(header)
1465	    idx = parser.parse()
1466	    self.headers[header] = idx;
1467	    self.idx.merge(idx)
1468
1469    def scanModules(self):
1470	for module in self.modules.keys():
1471	    parser = CParser(module)
1472	    idx = parser.parse()
1473	    # idx.analyze()
1474	    self.modules[module] = idx
1475	    self.idx.merge_public(idx)
1476
1477    def scan(self):
1478        for directory in self.directories:
1479	    files = glob.glob(directory + "/*.c")
1480	    for file in files:
1481	        skip = 0
1482		for excl in self.excludes:
1483		    if string.find(file, excl) != -1:
1484		        skip = 1;
1485			break
1486		if skip == 0:
1487		    self.modules[file] = None;
1488	    files = glob.glob(directory + "/*.h")
1489	    for file in files:
1490	        skip = 0
1491		for excl in self.excludes:
1492		    if string.find(file, excl) != -1:
1493		        skip = 1;
1494			break
1495		if skip == 0:
1496		    self.headers[file] = None;
1497	self.scanHeaders()
1498	self.scanModules()
1499
1500    def modulename_file(self, file):
1501        module = os.path.basename(file)
1502	if module[-2:] == '.h':
1503	    module = module[:-2]
1504	return module
1505
1506    def serialize_enum(self, output, name):
1507        id = self.idx.enums[name]
1508        output.write("    <enum name='%s' file='%s'" % (name,
1509	             self.modulename_file(id.module)))
1510	if id.info != None:
1511	    info = id.info
1512	    if info[0] != None and info[0] != '':
1513	        try:
1514		    val = eval(info[0])
1515		except:
1516		    val = info[0]
1517		output.write(" value='%s'" % (val));
1518	    if info[2] != None and info[2] != '':
1519		output.write(" type='%s'" % info[2]);
1520	    if info[1] != None and info[1] != '':
1521		output.write(" info='%s'" % escape(info[1]));
1522        output.write("/>\n")
1523
1524    def serialize_macro(self, output, name):
1525        id = self.idx.macros[name]
1526        output.write("    <macro name='%s' file='%s'>\n" % (name,
1527	             self.modulename_file(id.module)))
1528	if id.info != None:
1529            try:
1530		(args, desc) = id.info
1531		if desc != None and desc != "":
1532		    output.write("      <info>%s</info>\n" % (escape(desc)))
1533		    self.indexString(name, desc)
1534		for arg in args:
1535		    (name, desc) = arg
1536		    if desc != None and desc != "":
1537			output.write("      <arg name='%s' info='%s'/>\n" % (
1538				     name, escape(desc)))
1539			self.indexString(name, desc)
1540		    else:
1541			output.write("      <arg name='%s'/>\n" % (name))
1542            except:
1543                pass
1544        output.write("    </macro>\n")
1545
1546    def serialize_typedef(self, output, name):
1547        id = self.idx.typedefs[name]
1548	if id.info[0:7] == 'struct ':
1549	    output.write("    <struct name='%s' file='%s' type='%s'" % (
1550	             name, self.modulename_file(id.module), id.info))
1551	    name = id.info[7:]
1552	    if self.idx.structs.has_key(name) and ( \
1553	       type(self.idx.structs[name].info) == type(()) or
1554		type(self.idx.structs[name].info) == type([])):
1555	        output.write(">\n");
1556		try:
1557		    for field in self.idx.structs[name].info:
1558			desc = field[2]
1559			self.indexString(name, desc)
1560			if desc == None:
1561			    desc = ''
1562			else:
1563			    desc = escape(desc)
1564			output.write("      <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1565		except:
1566		    print "Failed to serialize struct %s" % (name)
1567		output.write("    </struct>\n")
1568	    else:
1569	        output.write("/>\n");
1570	else :
1571	    output.write("    <typedef name='%s' file='%s' type='%s'/>\n" % (
1572	             name, self.modulename_file(id.module), id.info))
1573
1574    def serialize_variable(self, output, name):
1575        id = self.idx.variables[name]
1576	if id.info != None:
1577	    output.write("    <variable name='%s' file='%s' type='%s'/>\n" % (
1578		    name, self.modulename_file(id.module), id.info))
1579	else:
1580	    output.write("    <variable name='%s' file='%s'/>\n" % (
1581	            name, self.modulename_file(id.module)))
1582
1583    def serialize_function(self, output, name):
1584        id = self.idx.functions[name]
1585        output.write("    <%s name='%s' file='%s'>\n" % (id.type, name,
1586	             self.modulename_file(id.module)))
1587	try:
1588	    (ret, params, desc) = id.info
1589	    output.write("      <info>%s</info>\n" % (escape(desc)))
1590	    self.indexString(name, desc)
1591	    if ret[0] != None:
1592	        if ret[0] == "void":
1593		    output.write("      <return type='void'/>\n")
1594		else:
1595		    output.write("      <return type='%s' info='%s'/>\n" % (
1596			     ret[0], escape(ret[1])))
1597		    self.indexString(name, ret[1])
1598	    for param in params:
1599	        if param[0] == 'void':
1600		    continue
1601	        if param[2] == None:
1602		    output.write("      <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1603		else:
1604		    output.write("      <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1605		    self.indexString(name, param[2])
1606	except:
1607	    print "Failed to save function %s info: " % name, `id.info`
1608        output.write("    </%s>\n" % (id.type))
1609
1610    def serialize_exports(self, output, file):
1611        module = self.modulename_file(file)
1612	output.write("    <file name='%s'>\n" % (module))
1613	dict = self.headers[file]
1614	if dict.info != None:
1615	    for data in ('Summary', 'Description', 'Author'):
1616		try:
1617		    output.write("     <%s>%s</%s>\n" % (
1618		                 string.lower(data),
1619				 escape(dict.info[data]),
1620				 string.lower(data)))
1621		except:
1622		    print "Header %s lacks a %s description" % (module, data)
1623	    if dict.info.has_key('Description'):
1624	        desc = dict.info['Description']
1625		if string.find(desc, "DEPRECATED") != -1:
1626		    output.write("     <deprecated/>\n")
1627
1628        ids = dict.macros.keys()
1629	ids.sort()
1630	for id in uniq(ids):
1631	    # Macros are sometime used to masquerade other types.
1632	    if dict.functions.has_key(id):
1633	        continue
1634	    if dict.variables.has_key(id):
1635	        continue
1636	    if dict.typedefs.has_key(id):
1637	        continue
1638	    if dict.structs.has_key(id):
1639	        continue
1640	    if dict.enums.has_key(id):
1641	        continue
1642	    output.write("     <exports symbol='%s' type='macro'/>\n" % (id))
1643        ids = dict.enums.keys()
1644	ids.sort()
1645	for id in uniq(ids):
1646	    output.write("     <exports symbol='%s' type='enum'/>\n" % (id))
1647        ids = dict.typedefs.keys()
1648	ids.sort()
1649	for id in uniq(ids):
1650	    output.write("     <exports symbol='%s' type='typedef'/>\n" % (id))
1651        ids = dict.structs.keys()
1652	ids.sort()
1653	for id in uniq(ids):
1654	    output.write("     <exports symbol='%s' type='struct'/>\n" % (id))
1655        ids = dict.variables.keys()
1656	ids.sort()
1657	for id in uniq(ids):
1658	    output.write("     <exports symbol='%s' type='variable'/>\n" % (id))
1659        ids = dict.functions.keys()
1660	ids.sort()
1661	for id in uniq(ids):
1662	    output.write("     <exports symbol='%s' type='function'/>\n" % (id))
1663	output.write("    </file>\n")
1664
1665    def serialize_xrefs_files(self, output):
1666        headers = self.headers.keys()
1667        headers.sort()
1668        for file in headers:
1669	    module = self.modulename_file(file)
1670	    output.write("    <file name='%s'>\n" % (module))
1671	    dict = self.headers[file]
1672	    ids = uniq(dict.functions.keys() + dict.variables.keys() + \
1673		  dict.macros.keys() + dict.typedefs.keys() + \
1674		  dict.structs.keys() + dict.enums.keys())
1675	    ids.sort()
1676	    for id in ids:
1677		output.write("      <ref name='%s'/>\n" % (id))
1678	    output.write("    </file>\n")
1679        pass
1680
1681    def serialize_xrefs_functions(self, output):
1682        funcs = {}
1683	for name in self.idx.functions.keys():
1684	    id = self.idx.functions[name]
1685	    try:
1686		(ret, params, desc) = id.info
1687		for param in params:
1688		    if param[0] == 'void':
1689			continue
1690		    if funcs.has_key(param[0]):
1691		        funcs[param[0]].append(name)
1692		    else:
1693		        funcs[param[0]] = [name]
1694	    except:
1695	        pass
1696	typ = funcs.keys()
1697	typ.sort()
1698	for type in typ:
1699	    if type == '' or type == 'void' or type == "int" or \
1700	       type == "char *" or type == "const char *" :
1701	        continue
1702	    output.write("    <type name='%s'>\n" % (type))
1703	    ids = funcs[type]
1704	    ids.sort()
1705	    pid = ''	# not sure why we have dups, but get rid of them!
1706	    for id in ids:
1707	        if id != pid:
1708	            output.write("      <ref name='%s'/>\n" % (id))
1709		    pid = id
1710	    output.write("    </type>\n")
1711
1712    def serialize_xrefs_constructors(self, output):
1713        funcs = {}
1714	for name in self.idx.functions.keys():
1715	    id = self.idx.functions[name]
1716	    try:
1717		(ret, params, desc) = id.info
1718		if ret[0] == "void":
1719		    continue
1720		if funcs.has_key(ret[0]):
1721		    funcs[ret[0]].append(name)
1722		else:
1723		    funcs[ret[0]] = [name]
1724	    except:
1725	        pass
1726	typ = funcs.keys()
1727	typ.sort()
1728	for type in typ:
1729	    if type == '' or type == 'void' or type == "int" or \
1730	       type == "char *" or type == "const char *" :
1731	        continue
1732	    output.write("    <type name='%s'>\n" % (type))
1733	    ids = funcs[type]
1734	    ids.sort()
1735	    for id in ids:
1736	        output.write("      <ref name='%s'/>\n" % (id))
1737	    output.write("    </type>\n")
1738
1739    def serialize_xrefs_alpha(self, output):
1740	letter = None
1741	ids = self.idx.identifiers.keys()
1742	ids.sort()
1743	for id in ids:
1744	    if id[0] != letter:
1745		if letter != None:
1746		    output.write("    </letter>\n")
1747		letter = id[0]
1748		output.write("    <letter name='%s'>\n" % (letter))
1749	    output.write("      <ref name='%s'/>\n" % (id))
1750	if letter != None:
1751	    output.write("    </letter>\n")
1752
1753    def serialize_xrefs_references(self, output):
1754        typ = self.idx.identifiers.keys()
1755	typ.sort()
1756	for id in typ:
1757	    idf = self.idx.identifiers[id]
1758	    module = idf.module
1759	    output.write("    <reference name='%s' href='%s'/>\n" % (id,
1760	                 'html/' + self.basename + '-' +
1761		         self.modulename_file(module) + '.html#' +
1762			 id))
1763
1764    def serialize_xrefs_index(self, output):
1765        index = self.xref
1766	typ = index.keys()
1767	typ.sort()
1768	letter = None
1769	count = 0
1770	chunk = 0
1771	chunks = []
1772	for id in typ:
1773	    if len(index[id]) > 30:
1774		continue
1775	    if id[0] != letter:
1776		if letter == None or count > 200:
1777		    if letter != None:
1778			output.write("      </letter>\n")
1779			output.write("    </chunk>\n")
1780			count = 0
1781			chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
1782		    output.write("    <chunk name='chunk%s'>\n" % (chunk))
1783		    first_letter = id[0]
1784		    chunk = chunk + 1
1785		elif letter != None:
1786		    output.write("      </letter>\n")
1787		letter = id[0]
1788		output.write("      <letter name='%s'>\n" % (letter))
1789	    output.write("        <word name='%s'>\n" % (id))
1790	    tokens = index[id];
1791	    tokens.sort()
1792	    tok = None
1793	    for token in tokens:
1794		if tok == token:
1795		    continue
1796		tok = token
1797		output.write("          <ref name='%s'/>\n" % (token))
1798		count = count + 1
1799	    output.write("        </word>\n")
1800	if letter != None:
1801	    output.write("      </letter>\n")
1802	    output.write("    </chunk>\n")
1803	    if count != 0:
1804	        chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
1805	    output.write("    <chunks>\n")
1806	    for ch in chunks:
1807		output.write("      <chunk name='%s' start='%s' end='%s'/>\n" % (
1808			     ch[0], ch[1], ch[2]))
1809	    output.write("    </chunks>\n")
1810
1811    def serialize_xrefs(self, output):
1812	output.write("  <references>\n")
1813	self.serialize_xrefs_references(output)
1814	output.write("  </references>\n")
1815	output.write("  <alpha>\n")
1816	self.serialize_xrefs_alpha(output)
1817	output.write("  </alpha>\n")
1818	output.write("  <constructors>\n")
1819	self.serialize_xrefs_constructors(output)
1820	output.write("  </constructors>\n")
1821	output.write("  <functions>\n")
1822	self.serialize_xrefs_functions(output)
1823	output.write("  </functions>\n")
1824	output.write("  <files>\n")
1825	self.serialize_xrefs_files(output)
1826	output.write("  </files>\n")
1827	output.write("  <index>\n")
1828	self.serialize_xrefs_index(output)
1829	output.write("  </index>\n")
1830
1831    def serialize(self, outdir):
1832        filename = outdir + "%s-api.xml" % self.name
1833        print "Saving XML description %s" % (filename)
1834        output = open(filename, "w")
1835        output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1836        output.write("<api name='%s'>\n" % self.name)
1837        output.write("  <files>\n")
1838        headers = self.headers.keys()
1839        headers.sort()
1840        for file in headers:
1841            self.serialize_exports(output, file)
1842        output.write("  </files>\n")
1843        output.write("  <symbols>\n")
1844        macros = self.idx.macros.keys()
1845        macros.sort()
1846        for macro in macros:
1847            self.serialize_macro(output, macro)
1848        enums = self.idx.enums.keys()
1849        enums.sort()
1850        for enum in enums:
1851            self.serialize_enum(output, enum)
1852        typedefs = self.idx.typedefs.keys()
1853        typedefs.sort()
1854        for typedef in typedefs:
1855            self.serialize_typedef(output, typedef)
1856        variables = self.idx.variables.keys()
1857        variables.sort()
1858        for variable in variables:
1859            self.serialize_variable(output, variable)
1860        functions = self.idx.functions.keys()
1861        functions.sort()
1862        for function in functions:
1863            self.serialize_function(output, function)
1864        output.write("  </symbols>\n")
1865        output.write("</api>\n")
1866        output.close()
1867
1868        filename = outdir + "%s-refs.xml" % self.name
1869        print "Saving XML Cross References %s" % (filename)
1870        output = open(filename, "w")
1871        output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1872        output.write("<apirefs name='%s'>\n" % self.name)
1873        self.serialize_xrefs(output)
1874        output.write("</apirefs>\n")
1875        output.close()
1876
1877
1878def rebuild():
1879    builder = None
1880    if glob.glob("parser.c") != [] :
1881        print "Rebuilding API description for libxml2"
1882	builder = docBuilder("libxml2", [".", "."],
1883	                     ["xmlwin32version.h", "tst.c"])
1884    elif glob.glob("../parser.c") != [] :
1885        print "Rebuilding API description for libxml2"
1886	builder = docBuilder("libxml2", ["..", "../include/libxml"],
1887	                     ["xmlwin32version.h", "tst.c"])
1888    elif glob.glob("../libxslt/transform.c") != [] :
1889        print "Rebuilding API description for libxslt"
1890	builder = docBuilder("libxslt", ["../libxslt"],
1891	                     ["win32config.h", "libxslt.h", "tst.c"])
1892    else:
1893        print "rebuild() failed, unable to guess the module"
1894	return None
1895    builder.scan()
1896    builder.analyze()
1897    builder.serialize("./")
1898    if glob.glob("../libexslt/exslt.c") != [] :
1899        extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
1900	extra.scan()
1901	extra.analyze()
1902	extra.serialize("EXSLT/")
1903    return builder
1904
1905#
1906# for debugging the parser
1907#
1908def parse(filename):
1909    parser = CParser(filename)
1910    idx = parser.parse()
1911    return idx
1912
1913if __name__ == "__main__":
1914    rebuild()
1915