1#!/usr/bin/python -u 2# 3# This is the API builder, it parses the C sources and build the 4# API formal description in XML. 5# 6# See Copyright for the status of this software. 7# 8# daniel@veillard.com 9# 10import os, sys 11import string 12import glob 13 14debug=0 15 16# 17# C parser analysis code 18# 19ignored_files = { 20 "trio": "too many non standard macros", 21 "trio.c": "too many non standard macros", 22 "trionan.c": "too many non standard macros", 23 "triostr.c": "too many non standard macros", 24 "acconfig.h": "generated portability layer", 25 "config.h": "generated portability layer", 26 "libxml.h": "internal only", 27 "testOOM.c": "out of memory tester", 28 "testOOMlib.h": "out of memory tester", 29 "testOOMlib.c": "out of memory tester", 30} 31 32ignored_words = { 33 "WINAPI": (0, "Windows keyword"), 34 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"), 35 "XMLPUBVAR": (0, "Special macro for extern vars for win32"), 36 "XSLTPUBVAR": (0, "Special macro for extern vars for win32"), 37 "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"), 38 "XMLPUBFUN": (0, "Special macro for extern funcs for win32"), 39 "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"), 40 "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"), 41 "XMLCALL": (0, "Special macro for win32 calls"), 42 "XSLTCALL": (0, "Special macro for win32 calls"), 43 "EXSLTCALL": (0, "Special macro for win32 calls"), 44 "__declspec": (3, "Windows keyword"), 45 "ATTRIBUTE_UNUSED": (0, "macro keyword"), 46 "LIBEXSLT_PUBLIC": (0, "macro keyword"), 47 "X_IN_Y": (5, "macro function builder"), 48 "XSLT_ITEM_COMMON_FIELDS": (0, "Special macro"), 49 "CALLBACK": (0, "macro keyword"), 50} 51 52def escape(raw): 53 raw = string.replace(raw, '&', '&') 54 raw = string.replace(raw, '<', '<') 55 raw = string.replace(raw, '>', '>') 56 raw = string.replace(raw, "'", ''') 57 raw = string.replace(raw, '"', '"') 58 return raw 59 60def uniq(items): 61 d = {} 62 for item in items: 63 d[item]=1 64 return d.keys() 65 66class identifier: 67 def __init__(self, name, module=None, type=None, lineno = 0, 68 info=None, extra=None): 69 self.name = name 70 self.module = module 71 self.type = type 72 self.info = info 73 self.extra = extra 74 self.lineno = lineno 75 self.static = 0 76 77 def __repr__(self): 78 r = "%s %s:" % (self.type, self.name) 79 if self.static: 80 r = r + " static" 81 if self.module != None: 82 r = r + " from %s" % (self.module) 83 if self.info != None: 84 r = r + " " + `self.info` 85 if self.extra != None: 86 r = r + " " + `self.extra` 87 return r 88 89 90 def set_module(self, module): 91 self.module = module 92 def set_type(self, type): 93 self.type = type 94 def set_info(self, info): 95 self.info = info 96 def set_extra(self, extra): 97 self.extra = extra 98 def set_lineno(self, lineno): 99 self.lineno = lineno 100 def set_static(self, static): 101 self.static = static 102 103 def get_name(self): 104 return self.name 105 def get_module(self): 106 return self.module 107 def get_type(self): 108 return self.type 109 def get_info(self): 110 return self.info 111 def get_lineno(self): 112 return self.lineno 113 def get_extra(self): 114 return self.extra 115 def get_static(self): 116 return self.static 117 118 def update(self, module, type = None, info = None, extra=None): 119 if module != None and self.module == None: 120 self.set_module(module) 121 if type != None and self.type == None: 122 self.set_type(type) 123 if info != None: 124 self.set_info(info) 125 if extra != None: 126 self.set_extra(extra) 127 128 129class index: 130 def __init__(self, name = "noname"): 131 self.name = name 132 self.identifiers = {} 133 self.functions = {} 134 self.variables = {} 135 self.includes = {} 136 self.structs = {} 137 self.enums = {} 138 self.typedefs = {} 139 self.macros = {} 140 self.references = {} 141 self.info = {} 142 143 def add_ref(self, name, module, static, type, lineno, info=None, extra=None): 144 if name[0:2] == '__': 145 return None 146 d = None 147 try: 148 d = self.identifiers[name] 149 d.update(module, type, lineno, info, extra) 150 except: 151 d = identifier(name, module, type, lineno, info, extra) 152 self.identifiers[name] = d 153 154 if d != None and static == 1: 155 d.set_static(1) 156 157 if d != None and name != None and type != None: 158 self.references[name] = d 159 160 def add(self, name, module, static, type, lineno, info=None, extra=None): 161 if name[0:2] == '__': 162 return None 163 d = None 164 try: 165 d = self.identifiers[name] 166 d.update(module, type, lineno, info, extra) 167 except: 168 d = identifier(name, module, type, lineno, info, extra) 169 self.identifiers[name] = d 170 171 if d != None and static == 1: 172 d.set_static(1) 173 174 if d != None and name != None and type != None: 175 if type == "function": 176 self.functions[name] = d 177 elif type == "functype": 178 self.functions[name] = d 179 elif type == "variable": 180 self.variables[name] = d 181 elif type == "include": 182 self.includes[name] = d 183 elif type == "struct": 184 self.structs[name] = d 185 elif type == "enum": 186 self.enums[name] = d 187 elif type == "typedef": 188 self.typedefs[name] = d 189 elif type == "macro": 190 self.macros[name] = d 191 else: 192 print "Unable to register type ", type 193 return d 194 195 def merge(self, idx): 196 for id in idx.functions.keys(): 197 # 198 # macro might be used to override functions or variables 199 # definitions 200 # 201 if self.macros.has_key(id): 202 del self.macros[id] 203 if self.functions.has_key(id): 204 print "function %s from %s redeclared in %s" % ( 205 id, self.functions[id].module, idx.functions[id].module) 206 else: 207 self.functions[id] = idx.functions[id] 208 self.identifiers[id] = idx.functions[id] 209 for id in idx.variables.keys(): 210 # 211 # macro might be used to override functions or variables 212 # definitions 213 # 214 if self.macros.has_key(id): 215 del self.macros[id] 216 if self.variables.has_key(id): 217 print "variable %s from %s redeclared in %s" % ( 218 id, self.variables[id].module, idx.variables[id].module) 219 else: 220 self.variables[id] = idx.variables[id] 221 self.identifiers[id] = idx.variables[id] 222 for id in idx.structs.keys(): 223 if self.structs.has_key(id): 224 print "struct %s from %s redeclared in %s" % ( 225 id, self.structs[id].module, idx.structs[id].module) 226 else: 227 self.structs[id] = idx.structs[id] 228 self.identifiers[id] = idx.structs[id] 229 for id in idx.typedefs.keys(): 230 if self.typedefs.has_key(id): 231 print "typedef %s from %s redeclared in %s" % ( 232 id, self.typedefs[id].module, idx.typedefs[id].module) 233 else: 234 self.typedefs[id] = idx.typedefs[id] 235 self.identifiers[id] = idx.typedefs[id] 236 for id in idx.macros.keys(): 237 # 238 # macro might be used to override functions or variables 239 # definitions 240 # 241 if self.variables.has_key(id): 242 continue 243 if self.functions.has_key(id): 244 continue 245 if self.enums.has_key(id): 246 continue 247 if self.macros.has_key(id): 248 print "macro %s from %s redeclared in %s" % ( 249 id, self.macros[id].module, idx.macros[id].module) 250 else: 251 self.macros[id] = idx.macros[id] 252 self.identifiers[id] = idx.macros[id] 253 for id in idx.enums.keys(): 254 if self.enums.has_key(id): 255 print "enum %s from %s redeclared in %s" % ( 256 id, self.enums[id].module, idx.enums[id].module) 257 else: 258 self.enums[id] = idx.enums[id] 259 self.identifiers[id] = idx.enums[id] 260 261 def merge_public(self, idx): 262 for id in idx.functions.keys(): 263 if self.functions.has_key(id): 264 up = idx.functions[id] 265 self.functions[id].update(None, up.type, up.info, up.extra) 266 # else: 267 # print "Function %s from %s is not declared in headers" % ( 268 # id, idx.functions[id].module) 269 # TODO: do the same for variables. 270 271 def analyze_dict(self, type, dict): 272 count = 0 273 public = 0 274 for name in dict.keys(): 275 id = dict[name] 276 count = count + 1 277 if id.static == 0: 278 public = public + 1 279 if count != public: 280 print " %d %s , %d public" % (count, type, public) 281 elif count != 0: 282 print " %d public %s" % (count, type) 283 284 285 def analyze(self): 286 self.analyze_dict("functions", self.functions) 287 self.analyze_dict("variables", self.variables) 288 self.analyze_dict("structs", self.structs) 289 self.analyze_dict("typedefs", self.typedefs) 290 self.analyze_dict("macros", self.macros) 291 292class CLexer: 293 """A lexer for the C language, tokenize the input by reading and 294 analyzing it line by line""" 295 def __init__(self, input): 296 self.input = input 297 self.tokens = [] 298 self.line = "" 299 self.lineno = 0 300 301 def getline(self): 302 line = '' 303 while line == '': 304 line = self.input.readline() 305 if not line: 306 return None 307 self.lineno = self.lineno + 1 308 line = string.lstrip(line) 309 line = string.rstrip(line) 310 if line == '': 311 continue 312 while line[-1] == '\\': 313 line = line[:-1] 314 n = self.input.readline() 315 self.lineno = self.lineno + 1 316 n = string.lstrip(n) 317 n = string.rstrip(n) 318 if not n: 319 break 320 else: 321 line = line + n 322 return line 323 324 def getlineno(self): 325 return self.lineno 326 327 def push(self, token): 328 self.tokens.insert(0, token); 329 330 def debug(self): 331 print "Last token: ", self.last 332 print "Token queue: ", self.tokens 333 print "Line %d end: " % (self.lineno), self.line 334 335 def token(self): 336 while self.tokens == []: 337 if self.line == "": 338 line = self.getline() 339 else: 340 line = self.line 341 self.line = "" 342 if line == None: 343 return None 344 345 if line[0] == '#': 346 self.tokens = map((lambda x: ('preproc', x)), 347 string.split(line)) 348 break; 349 l = len(line) 350 if line[0] == '"' or line[0] == "'": 351 end = line[0] 352 line = line[1:] 353 found = 0 354 tok = "" 355 while found == 0: 356 i = 0 357 l = len(line) 358 while i < l: 359 if line[i] == end: 360 self.line = line[i+1:] 361 line = line[:i] 362 l = i 363 found = 1 364 break 365 if line[i] == '\\': 366 i = i + 1 367 i = i + 1 368 tok = tok + line 369 if found == 0: 370 line = self.getline() 371 if line == None: 372 return None 373 self.last = ('string', tok) 374 return self.last 375 376 if l >= 2 and line[0] == '/' and line[1] == '*': 377 line = line[2:] 378 found = 0 379 tok = "" 380 while found == 0: 381 i = 0 382 l = len(line) 383 while i < l: 384 if line[i] == '*' and i+1 < l and line[i+1] == '/': 385 self.line = line[i+2:] 386 line = line[:i-1] 387 l = i 388 found = 1 389 break 390 i = i + 1 391 if tok != "": 392 tok = tok + "\n" 393 tok = tok + line 394 if found == 0: 395 line = self.getline() 396 if line == None: 397 return None 398 self.last = ('comment', tok) 399 return self.last 400 if l >= 2 and line[0] == '/' and line[1] == '/': 401 line = line[2:] 402 self.last = ('comment', line) 403 return self.last 404 i = 0 405 while i < l: 406 if line[i] == '/' and i+1 < l and line[i+1] == '/': 407 self.line = line[i:] 408 line = line[:i] 409 break 410 if line[i] == '/' and i+1 < l and line[i+1] == '*': 411 self.line = line[i:] 412 line = line[:i] 413 break 414 if line[i] == '"' or line[i] == "'": 415 self.line = line[i:] 416 line = line[:i] 417 break 418 i = i + 1 419 l = len(line) 420 i = 0 421 while i < l: 422 if line[i] == ' ' or line[i] == '\t': 423 i = i + 1 424 continue 425 o = ord(line[i]) 426 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 427 (o >= 48 and o <= 57): 428 s = i 429 while i < l: 430 o = ord(line[i]) 431 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 432 (o >= 48 and o <= 57) or string.find( 433 " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1: 434 i = i + 1 435 else: 436 break 437 self.tokens.append(('name', line[s:i])) 438 continue 439 if string.find("(){}:;,[]", line[i]) != -1: 440# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \ 441# line[i] == '}' or line[i] == ':' or line[i] == ';' or \ 442# line[i] == ',' or line[i] == '[' or line[i] == ']': 443 self.tokens.append(('sep', line[i])) 444 i = i + 1 445 continue 446 if string.find("+-*><=/%&!|.", line[i]) != -1: 447# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \ 448# line[i] == '>' or line[i] == '<' or line[i] == '=' or \ 449# line[i] == '/' or line[i] == '%' or line[i] == '&' or \ 450# line[i] == '!' or line[i] == '|' or line[i] == '.': 451 if line[i] == '.' and i + 2 < l and \ 452 line[i+1] == '.' and line[i+2] == '.': 453 self.tokens.append(('name', '...')) 454 i = i + 3 455 continue 456 457 j = i + 1 458 if j < l and ( 459 string.find("+-*><=/%&!|", line[j]) != -1): 460# line[j] == '+' or line[j] == '-' or line[j] == '*' or \ 461# line[j] == '>' or line[j] == '<' or line[j] == '=' or \ 462# line[j] == '/' or line[j] == '%' or line[j] == '&' or \ 463# line[j] == '!' or line[j] == '|'): 464 self.tokens.append(('op', line[i:j+1])) 465 i = j + 1 466 else: 467 self.tokens.append(('op', line[i])) 468 i = i + 1 469 continue 470 s = i 471 while i < l: 472 o = ord(line[i]) 473 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 474 (o >= 48 and o <= 57) or ( 475 string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1): 476# line[i] != ' ' and line[i] != '\t' and 477# line[i] != '(' and line[i] != ')' and 478# line[i] != '{' and line[i] != '}' and 479# line[i] != ':' and line[i] != ';' and 480# line[i] != ',' and line[i] != '+' and 481# line[i] != '-' and line[i] != '*' and 482# line[i] != '/' and line[i] != '%' and 483# line[i] != '&' and line[i] != '!' and 484# line[i] != '|' and line[i] != '[' and 485# line[i] != ']' and line[i] != '=' and 486# line[i] != '*' and line[i] != '>' and 487# line[i] != '<'): 488 i = i + 1 489 else: 490 break 491 self.tokens.append(('name', line[s:i])) 492 493 tok = self.tokens[0] 494 self.tokens = self.tokens[1:] 495 self.last = tok 496 return tok 497 498class CParser: 499 """The C module parser""" 500 def __init__(self, filename, idx = None): 501 self.filename = filename 502 if len(filename) > 2 and filename[-2:] == '.h': 503 self.is_header = 1 504 else: 505 self.is_header = 0 506 self.input = open(filename) 507 self.lexer = CLexer(self.input) 508 if idx == None: 509 self.index = index() 510 else: 511 self.index = idx 512 self.top_comment = "" 513 self.last_comment = "" 514 self.comment = None 515 self.collect_ref = 0 516 self.no_error = 0 517 518 def collect_references(self): 519 self.collect_ref = 1 520 521 def stop_error(self): 522 self.no_error = 1 523 524 def start_error(self): 525 self.no_error = 0 526 527 def lineno(self): 528 return self.lexer.getlineno() 529 530 def index_add(self, name, module, static, type, info=None, extra = None): 531 self.index.add(name, module, static, type, self.lineno(), 532 info, extra) 533 534 def index_add_ref(self, name, module, static, type, info=None, 535 extra = None): 536 self.index.add_ref(name, module, static, type, self.lineno(), 537 info, extra) 538 539 def warning(self, msg): 540 if self.no_error: 541 return 542 print msg 543 544 def error(self, msg, token=-1): 545 if self.no_error: 546 return 547 548 print "Parse Error: " + msg 549 if token != -1: 550 print "Got token ", token 551 self.lexer.debug() 552 sys.exit(1) 553 554 def debug(self, msg, token=-1): 555 print "Debug: " + msg 556 if token != -1: 557 print "Got token ", token 558 self.lexer.debug() 559 560 def parseTopComment(self, comment): 561 res = {} 562 lines = string.split(comment, "\n") 563 item = None 564 for line in lines: 565 while line != "" and (line[0] == ' ' or line[0] == '\t'): 566 line = line[1:] 567 while line != "" and line[0] == '*': 568 line = line[1:] 569 while line != "" and (line[0] == ' ' or line[0] == '\t'): 570 line = line[1:] 571 try: 572 (it, line) = string.split(line, ":", 1) 573 item = it 574 while line != "" and (line[0] == ' ' or line[0] == '\t'): 575 line = line[1:] 576 if res.has_key(item): 577 res[item] = res[item] + " " + line 578 else: 579 res[item] = line 580 except: 581 if item != None: 582 if res.has_key(item): 583 res[item] = res[item] + " " + line 584 else: 585 res[item] = line 586 self.index.info = res 587 588 def parseComment(self, token): 589 if self.top_comment == "": 590 self.top_comment = token[1] 591 if self.comment == None or token[1][0] == '*': 592 self.comment = token[1]; 593 else: 594 self.comment = self.comment + token[1] 595 token = self.lexer.token() 596 597 if string.find(self.comment, "DOC_DISABLE") != -1: 598 self.stop_error() 599 600 if string.find(self.comment, "DOC_ENABLE") != -1: 601 self.start_error() 602 603 return token 604 605 # 606 # Parse a comment block associate to a macro 607 # 608 def parseMacroComment(self, name, quiet = 0): 609 if name[0:2] == '__': 610 quiet = 1 611 612 args = [] 613 desc = "" 614 615 if self.comment == None: 616 if not quiet: 617 self.warning("Missing comment for macro %s" % (name)) 618 return((args, desc)) 619 if self.comment[0] != '*': 620 if not quiet: 621 self.warning("Missing * in macro comment for %s" % (name)) 622 return((args, desc)) 623 lines = string.split(self.comment, '\n') 624 if lines[0] == '*': 625 del lines[0] 626 if lines[0] != "* %s:" % (name): 627 if not quiet: 628 self.warning("Misformatted macro comment for %s" % (name)) 629 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 630 return((args, desc)) 631 del lines[0] 632 while lines[0] == '*': 633 del lines[0] 634 while len(lines) > 0 and lines[0][0:3] == '* @': 635 l = lines[0][3:] 636 try: 637 (arg, desc) = string.split(l, ':', 1) 638 desc=string.strip(desc) 639 arg=string.strip(arg) 640 except: 641 if not quiet: 642 self.warning("Misformatted macro comment for %s" % (name)) 643 self.warning(" problem with '%s'" % (lines[0])) 644 del lines[0] 645 continue 646 del lines[0] 647 l = string.strip(lines[0]) 648 while len(l) > 2 and l[0:3] != '* @': 649 while l[0] == '*': 650 l = l[1:] 651 desc = desc + ' ' + string.strip(l) 652 del lines[0] 653 if len(lines) == 0: 654 break 655 l = lines[0] 656 args.append((arg, desc)) 657 while len(lines) > 0 and lines[0] == '*': 658 del lines[0] 659 desc = "" 660 while len(lines) > 0: 661 l = lines[0] 662 while len(l) > 0 and l[0] == '*': 663 l = l[1:] 664 l = string.strip(l) 665 desc = desc + " " + l 666 del lines[0] 667 668 desc = string.strip(desc) 669 670 if quiet == 0: 671 if desc == "": 672 self.warning("Macro comment for %s lack description of the macro" % (name)) 673 674 return((args, desc)) 675 676 # 677 # Parse a comment block and merge the informations found in the 678 # parameters descriptions, finally returns a block as complete 679 # as possible 680 # 681 def mergeFunctionComment(self, name, description, quiet = 0): 682 if name == 'main': 683 quiet = 1 684 if name[0:2] == '__': 685 quiet = 1 686 687 (ret, args) = description 688 desc = "" 689 retdesc = "" 690 691 if self.comment == None: 692 if not quiet: 693 self.warning("Missing comment for function %s" % (name)) 694 return(((ret[0], retdesc), args, desc)) 695 if self.comment[0] != '*': 696 if not quiet: 697 self.warning("Missing * in function comment for %s" % (name)) 698 return(((ret[0], retdesc), args, desc)) 699 lines = string.split(self.comment, '\n') 700 if lines[0] == '*': 701 del lines[0] 702 if lines[0] != "* %s:" % (name): 703 if not quiet: 704 self.warning("Misformatted function comment for %s" % (name)) 705 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 706 return(((ret[0], retdesc), args, desc)) 707 del lines[0] 708 while len(lines) > 0 and lines[0] == '*': 709 del lines[0] 710 nbargs = len(args) 711 while len(lines) > 0 and lines[0][0:3] == '* @': 712 l = lines[0][3:] 713 try: 714 (arg, desc) = string.split(l, ':', 1) 715 desc=string.strip(desc) 716 arg=string.strip(arg) 717 except: 718 if not quiet: 719 self.warning("Misformatted function comment for %s" % (name)) 720 self.warning(" problem with '%s'" % (lines[0])) 721 del lines[0] 722 continue 723 del lines[0] 724 l = string.strip(lines[0]) 725 while len(l) > 2 and l[0:3] != '* @': 726 while l[0] == '*': 727 l = l[1:] 728 desc = desc + ' ' + string.strip(l) 729 del lines[0] 730 if len(lines) == 0: 731 break 732 l = lines[0] 733 i = 0 734 while i < nbargs: 735 if args[i][1] == arg: 736 args[i] = (args[i][0], arg, desc) 737 break; 738 i = i + 1 739 if i >= nbargs: 740 if not quiet: 741 self.warning("Unable to find arg %s from function comment for %s" % ( 742 arg, name)) 743 while len(lines) > 0 and lines[0] == '*': 744 del lines[0] 745 desc = "" 746 while len(lines) > 0: 747 l = lines[0] 748 while len(l) > 0 and l[0] == '*': 749 l = l[1:] 750 l = string.strip(l) 751 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return": 752 try: 753 l = string.split(l, ' ', 1)[1] 754 except: 755 l = "" 756 retdesc = string.strip(l) 757 del lines[0] 758 while len(lines) > 0: 759 l = lines[0] 760 while len(l) > 0 and l[0] == '*': 761 l = l[1:] 762 l = string.strip(l) 763 retdesc = retdesc + " " + l 764 del lines[0] 765 else: 766 desc = desc + " " + l 767 del lines[0] 768 769 retdesc = string.strip(retdesc) 770 desc = string.strip(desc) 771 772 if quiet == 0: 773 # 774 # report missing comments 775 # 776 i = 0 777 while i < nbargs: 778 if args[i][2] == None and args[i][0] != "void" and args[i][1] != None: 779 self.warning("Function comment for %s lack description of arg %s" % (name, args[i][1])) 780 i = i + 1 781 if retdesc == "" and ret[0] != "void": 782 self.warning("Function comment for %s lack description of return value" % (name)) 783 if desc == "": 784 self.warning("Function comment for %s lack description of the function" % (name)) 785 786 787 return(((ret[0], retdesc), args, desc)) 788 789 def parsePreproc(self, token): 790 name = token[1] 791 if name == "#include": 792 token = self.lexer.token() 793 if token == None: 794 return None 795 if token[0] == 'preproc': 796 self.index_add(token[1], self.filename, not self.is_header, 797 "include") 798 return self.lexer.token() 799 return token 800 if name == "#define": 801 token = self.lexer.token() 802 if token == None: 803 return None 804 if token[0] == 'preproc': 805 # TODO macros with arguments 806 name = token[1] 807 lst = [] 808 token = self.lexer.token() 809 while token != None and token[0] == 'preproc' and \ 810 token[1][0] != '#': 811 lst.append(token[1]) 812 token = self.lexer.token() 813 try: 814 name = string.split(name, '(') [0] 815 except: 816 pass 817 info = self.parseMacroComment(name, not self.is_header) 818 self.index_add(name, self.filename, not self.is_header, 819 "macro", info) 820 return token 821 token = self.lexer.token() 822 while token != None and token[0] == 'preproc' and \ 823 token[1][0] != '#': 824 token = self.lexer.token() 825 return token 826 827 # 828 # token acquisition on top of the lexer, it handle internally 829 # preprocessor and comments since they are logically not part of 830 # the program structure. 831 # 832 def token(self): 833 global ignored_words 834 835 token = self.lexer.token() 836 while token != None: 837 if token[0] == 'comment': 838 token = self.parseComment(token) 839 continue 840 elif token[0] == 'preproc': 841 token = self.parsePreproc(token) 842 continue 843 elif token[0] == "name" and ignored_words.has_key(token[1]): 844 (n, info) = ignored_words[token[1]] 845 i = 0 846 while i < n: 847 token = self.lexer.token() 848 i = i + 1 849 token = self.lexer.token() 850 continue 851 else: 852 if debug: 853 print "=> ", token 854 return token 855 return None 856 857 # 858 # Parse a typedef, it records the type and its name. 859 # 860 def parseTypedef(self, token): 861 if token == None: 862 return None 863 token = self.parseType(token) 864 if token == None: 865 self.error("parsing typedef") 866 return None 867 base_type = self.type 868 type = base_type 869 #self.debug("end typedef type", token) 870 while token != None: 871 if token[0] == "name": 872 name = token[1] 873 signature = self.signature 874 if signature != None: 875 type = string.split(type, '(')[0] 876 d = self.mergeFunctionComment(name, 877 ((type, None), signature), 1) 878 self.index_add(name, self.filename, not self.is_header, 879 "functype", d) 880 else: 881 if base_type == "struct": 882 self.index_add(name, self.filename, not self.is_header, 883 "struct", type) 884 base_type = "struct " + name 885 else: 886 self.index_add(name, self.filename, not self.is_header, 887 "typedef", type) 888 token = self.token() 889 else: 890 self.error("parsing typedef: expecting a name") 891 return token 892 #self.debug("end typedef", token) 893 if token != None and token[0] == 'sep' and token[1] == ',': 894 type = base_type 895 token = self.token() 896 while token != None and token[0] == "op": 897 type = type + token[1] 898 token = self.token() 899 elif token != None and token[0] == 'sep' and token[1] == ';': 900 break; 901 elif token != None and token[0] == 'name': 902 type = base_type 903 continue; 904 else: 905 self.error("parsing typedef: expecting ';'", token) 906 return token 907 token = self.token() 908 return token 909 910 # 911 # Parse a C code block, used for functions it parse till 912 # the balancing } included 913 # 914 def parseBlock(self, token): 915 while token != None: 916 if token[0] == "sep" and token[1] == "{": 917 token = self.token() 918 token = self.parseBlock(token) 919 elif token[0] == "sep" and token[1] == "}": 920 self.comment = None 921 token = self.token() 922 return token 923 else: 924 if self.collect_ref == 1: 925 oldtok = token 926 token = self.token() 927 if oldtok[0] == "name" and oldtok[1][0:3] == "xml": 928 if token[0] == "sep" and token[1] == "(": 929 self.index_add_ref(oldtok[1], self.filename, 930 0, "function") 931 token = self.token() 932 elif token[0] == "name": 933 token = self.token() 934 if token[0] == "sep" and (token[1] == ";" or 935 token[1] == "," or token[1] == "="): 936 self.index_add_ref(oldtok[1], self.filename, 937 0, "type") 938 elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_": 939 self.index_add_ref(oldtok[1], self.filename, 940 0, "typedef") 941 elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_": 942 self.index_add_ref(oldtok[1], self.filename, 943 0, "typedef") 944 945 else: 946 token = self.token() 947 return token 948 949 # 950 # Parse a C struct definition till the balancing } 951 # 952 def parseStruct(self, token): 953 fields = [] 954 #self.debug("start parseStruct", token) 955 while token != None: 956 if token[0] == "sep" and token[1] == "{": 957 token = self.token() 958 token = self.parseTypeBlock(token) 959 elif token[0] == "sep" and token[1] == "}": 960 self.struct_fields = fields 961 #self.debug("end parseStruct", token) 962 #print fields 963 token = self.token() 964 return token 965 else: 966 base_type = self.type 967 #self.debug("before parseType", token) 968 token = self.parseType(token) 969 #self.debug("after parseType", token) 970 if token != None and token[0] == "name": 971 fname = token[1] 972 token = self.token() 973 if token[0] == "sep" and token[1] == ";": 974 self.comment = None 975 token = self.token() 976 fields.append((self.type, fname, self.comment)) 977 self.comment = None 978 else: 979 self.error("parseStruct: expecting ;", token) 980 elif token != None and token[0] == "sep" and token[1] == "{": 981 token = self.token() 982 token = self.parseTypeBlock(token) 983 if token != None and token[0] == "name": 984 token = self.token() 985 if token != None and token[0] == "sep" and token[1] == ";": 986 token = self.token() 987 else: 988 self.error("parseStruct: expecting ;", token) 989 else: 990 self.error("parseStruct: name", token) 991 token = self.token() 992 self.type = base_type; 993 self.struct_fields = fields 994 #self.debug("end parseStruct", token) 995 #print fields 996 return token 997 998 # 999 # Parse a C enum block, parse till the balancing } 1000 # 1001 def parseEnumBlock(self, token): 1002 self.enums = [] 1003 name = None 1004 self.comment = None 1005 comment = "" 1006 value = "0" 1007 while token != None: 1008 if token[0] == "sep" and token[1] == "{": 1009 token = self.token() 1010 token = self.parseTypeBlock(token) 1011 elif token[0] == "sep" and token[1] == "}": 1012 if name != None: 1013 if self.comment != None: 1014 comment = self.comment 1015 self.comment = None 1016 self.enums.append((name, value, comment)) 1017 token = self.token() 1018 return token 1019 elif token[0] == "name": 1020 if name != None: 1021 if self.comment != None: 1022 comment = string.strip(self.comment) 1023 self.comment = None 1024 self.enums.append((name, value, comment)) 1025 name = token[1] 1026 comment = "" 1027 token = self.token() 1028 if token[0] == "op" and token[1][0] == "=": 1029 value = "" 1030 if len(token[1]) > 1: 1031 value = token[1][1:] 1032 token = self.token() 1033 while token[0] != "sep" or (token[1] != ',' and 1034 token[1] != '}'): 1035 value = value + token[1] 1036 token = self.token() 1037 else: 1038 try: 1039 value = "%d" % (int(value) + 1) 1040 except: 1041 self.warning("Failed to compute value of enum %s" % (name)) 1042 value="" 1043 if token[0] == "sep" and token[1] == ",": 1044 token = self.token() 1045 else: 1046 token = self.token() 1047 return token 1048 1049 # 1050 # Parse a C definition block, used for structs it parse till 1051 # the balancing } 1052 # 1053 def parseTypeBlock(self, token): 1054 while token != None: 1055 if token[0] == "sep" and token[1] == "{": 1056 token = self.token() 1057 token = self.parseTypeBlock(token) 1058 elif token[0] == "sep" and token[1] == "}": 1059 token = self.token() 1060 return token 1061 else: 1062 token = self.token() 1063 return token 1064 1065 # 1066 # Parse a type: the fact that the type name can either occur after 1067 # the definition or within the definition makes it a little harder 1068 # if inside, the name token is pushed back before returning 1069 # 1070 def parseType(self, token): 1071 self.type = "" 1072 self.struct_fields = [] 1073 self.signature = None 1074 if token == None: 1075 return token 1076 1077 while token[0] == "name" and ( 1078 token[1] == "const" or token[1] == "unsigned" or 1079 token[1] == "signed"): 1080 if self.type == "": 1081 self.type = token[1] 1082 else: 1083 self.type = self.type + " " + token[1] 1084 token = self.token() 1085 1086 if token[0] == "name" and (token[1] == "long" or token[1] == "short"): 1087 if self.type == "": 1088 self.type = token[1] 1089 else: 1090 self.type = self.type + " " + token[1] 1091 if token[0] == "name" and token[1] == "int": 1092 if self.type == "": 1093 self.type = tmp[1] 1094 else: 1095 self.type = self.type + " " + tmp[1] 1096 1097 elif token[0] == "name" and token[1] == "struct": 1098 if self.type == "": 1099 self.type = token[1] 1100 else: 1101 self.type = self.type + " " + token[1] 1102 token = self.token() 1103 nametok = None 1104 if token[0] == "name": 1105 nametok = token 1106 token = self.token() 1107 if token != None and token[0] == "sep" and token[1] == "{": 1108 token = self.token() 1109 token = self.parseStruct(token) 1110 elif token != None and token[0] == "op" and token[1] == "*": 1111 self.type = self.type + " " + nametok[1] + " *" 1112 token = self.token() 1113 while token != None and token[0] == "op" and token[1] == "*": 1114 self.type = self.type + " *" 1115 token = self.token() 1116 if token[0] == "name": 1117 nametok = token 1118 token = self.token() 1119 else: 1120 self.error("struct : expecting name", token) 1121 return token 1122 elif token != None and token[0] == "name" and nametok != None: 1123 self.type = self.type + " " + nametok[1] 1124 return token 1125 1126 if nametok != None: 1127 self.lexer.push(token) 1128 token = nametok 1129 return token 1130 1131 elif token[0] == "name" and token[1] == "enum": 1132 if self.type == "": 1133 self.type = token[1] 1134 else: 1135 self.type = self.type + " " + token[1] 1136 self.enums = [] 1137 token = self.token() 1138 if token != None and token[0] == "sep" and token[1] == "{": 1139 token = self.token() 1140 token = self.parseEnumBlock(token) 1141 else: 1142 self.error("parsing enum: expecting '{'", token) 1143 enum_type = None 1144 if token != None and token[0] != "name": 1145 self.lexer.push(token) 1146 token = ("name", "enum") 1147 else: 1148 enum_type = token[1] 1149 for enum in self.enums: 1150 self.index_add(enum[0], self.filename, 1151 not self.is_header, "enum", 1152 (enum[1], enum[2], enum_type)) 1153 return token 1154 1155 elif token[0] == "name": 1156 if self.type == "": 1157 self.type = token[1] 1158 else: 1159 self.type = self.type + " " + token[1] 1160 else: 1161 self.error("parsing type %s: expecting a name" % (self.type), 1162 token) 1163 return token 1164 token = self.token() 1165 while token != None and (token[0] == "op" or 1166 token[0] == "name" and token[1] == "const"): 1167 self.type = self.type + " " + token[1] 1168 token = self.token() 1169 1170 # 1171 # if there is a parenthesis here, this means a function type 1172 # 1173 if token != None and token[0] == "sep" and token[1] == '(': 1174 self.type = self.type + token[1] 1175 token = self.token() 1176 while token != None and token[0] == "op" and token[1] == '*': 1177 self.type = self.type + token[1] 1178 token = self.token() 1179 if token == None or token[0] != "name" : 1180 self.error("parsing function type, name expected", token); 1181 return token 1182 self.type = self.type + token[1] 1183 nametok = token 1184 token = self.token() 1185 if token != None and token[0] == "sep" and token[1] == ')': 1186 self.type = self.type + token[1] 1187 token = self.token() 1188 if token != None and token[0] == "sep" and token[1] == '(': 1189 token = self.token() 1190 type = self.type; 1191 token = self.parseSignature(token); 1192 self.type = type; 1193 else: 1194 self.error("parsing function type, '(' expected", token); 1195 return token 1196 else: 1197 self.error("parsing function type, ')' expected", token); 1198 return token 1199 self.lexer.push(token) 1200 token = nametok 1201 return token 1202 1203 # 1204 # do some lookahead for arrays 1205 # 1206 if token != None and token[0] == "name": 1207 nametok = token 1208 token = self.token() 1209 if token != None and token[0] == "sep" and token[1] == '[': 1210 self.type = self.type + nametok[1] 1211 while token != None and token[0] == "sep" and token[1] == '[': 1212 self.type = self.type + token[1] 1213 token = self.token() 1214 while token != None and token[0] != 'sep' and \ 1215 token[1] != ']' and token[1] != ';': 1216 self.type = self.type + token[1] 1217 token = self.token() 1218 if token != None and token[0] == 'sep' and token[1] == ']': 1219 self.type = self.type + token[1] 1220 token = self.token() 1221 else: 1222 self.error("parsing array type, ']' expected", token); 1223 return token 1224 elif token != None and token[0] == "sep" and token[1] == ':': 1225 # remove :12 in case it's a limited int size 1226 token = self.token() 1227 token = self.token() 1228 self.lexer.push(token) 1229 token = nametok 1230 1231 return token 1232 1233 # 1234 # Parse a signature: '(' has been parsed and we scan the type definition 1235 # up to the ')' included 1236 def parseSignature(self, token): 1237 signature = [] 1238 if token != None and token[0] == "sep" and token[1] == ')': 1239 self.signature = [] 1240 token = self.token() 1241 return token 1242 while token != None: 1243 token = self.parseType(token) 1244 if token != None and token[0] == "name": 1245 signature.append((self.type, token[1], None)) 1246 token = self.token() 1247 elif token != None and token[0] == "sep" and token[1] == ',': 1248 token = self.token() 1249 continue 1250 elif token != None and token[0] == "sep" and token[1] == ')': 1251 # only the type was provided 1252 if self.type == "...": 1253 signature.append((self.type, "...", None)) 1254 else: 1255 signature.append((self.type, None, None)) 1256 if token != None and token[0] == "sep": 1257 if token[1] == ',': 1258 token = self.token() 1259 continue 1260 elif token[1] == ')': 1261 token = self.token() 1262 break 1263 self.signature = signature 1264 return token 1265 1266 # 1267 # Parse a global definition, be it a type, variable or function 1268 # the extern "C" blocks are a bit nasty and require it to recurse. 1269 # 1270 def parseGlobal(self, token): 1271 static = 0 1272 if token[1] == 'extern': 1273 token = self.token() 1274 if token == None: 1275 return token 1276 if token[0] == 'string': 1277 if token[1] == 'C': 1278 token = self.token() 1279 if token == None: 1280 return token 1281 if token[0] == 'sep' and token[1] == "{": 1282 token = self.token() 1283# print 'Entering extern "C line ', self.lineno() 1284 while token != None and (token[0] != 'sep' or 1285 token[1] != "}"): 1286 if token[0] == 'name': 1287 token = self.parseGlobal(token) 1288 else: 1289 self.error( 1290 "token %s %s unexpected at the top level" % ( 1291 token[0], token[1])) 1292 token = self.parseGlobal(token) 1293# print 'Exiting extern "C" line', self.lineno() 1294 token = self.token() 1295 return token 1296 else: 1297 return token 1298 elif token[1] == 'static': 1299 static = 1 1300 token = self.token() 1301 if token == None or token[0] != 'name': 1302 return token 1303 1304 if token[1] == 'typedef': 1305 token = self.token() 1306 return self.parseTypedef(token) 1307 else: 1308 token = self.parseType(token) 1309 type_orig = self.type 1310 if token == None or token[0] != "name": 1311 return token 1312 type = type_orig 1313 self.name = token[1] 1314 token = self.token() 1315 while token != None and (token[0] == "sep" or token[0] == "op"): 1316 if token[0] == "sep": 1317 if token[1] == "[": 1318 type = type + token[1] 1319 token = self.token() 1320 while token != None and (token[0] != "sep" or \ 1321 token[1] != ";"): 1322 type = type + token[1] 1323 token = self.token() 1324 1325 if token != None and token[0] == "op" and token[1] == "=": 1326 # 1327 # Skip the initialization of the variable 1328 # 1329 token = self.token() 1330 if token[0] == 'sep' and token[1] == '{': 1331 token = self.token() 1332 token = self.parseBlock(token) 1333 else: 1334 self.comment = None 1335 while token != None and (token[0] != "sep" or \ 1336 (token[1] != ';' and token[1] != ',')): 1337 token = self.token() 1338 self.comment = None 1339 if token == None or token[0] != "sep" or (token[1] != ';' and 1340 token[1] != ','): 1341 self.error("missing ';' or ',' after value") 1342 1343 if token != None and token[0] == "sep": 1344 if token[1] == ";": 1345 self.comment = None 1346 token = self.token() 1347 if type == "struct": 1348 self.index_add(self.name, self.filename, 1349 not self.is_header, "struct", self.struct_fields) 1350 else: 1351 self.index_add(self.name, self.filename, 1352 not self.is_header, "variable", type) 1353 break 1354 elif token[1] == "(": 1355 token = self.token() 1356 token = self.parseSignature(token) 1357 if token == None: 1358 return None 1359 if token[0] == "sep" and token[1] == ";": 1360 d = self.mergeFunctionComment(self.name, 1361 ((type, None), self.signature), 1) 1362 self.index_add(self.name, self.filename, static, 1363 "function", d) 1364 token = self.token() 1365 elif token[0] == "sep" and token[1] == "{": 1366 d = self.mergeFunctionComment(self.name, 1367 ((type, None), self.signature), static) 1368 self.index_add(self.name, self.filename, static, 1369 "function", d) 1370 token = self.token() 1371 token = self.parseBlock(token); 1372 elif token[1] == ',': 1373 self.comment = None 1374 self.index_add(self.name, self.filename, static, 1375 "variable", type) 1376 type = type_orig 1377 token = self.token() 1378 while token != None and token[0] == "sep": 1379 type = type + token[1] 1380 token = self.token() 1381 if token != None and token[0] == "name": 1382 self.name = token[1] 1383 token = self.token() 1384 else: 1385 break 1386 1387 return token 1388 1389 def parse(self): 1390 self.warning("Parsing %s" % (self.filename)) 1391 token = self.token() 1392 while token != None: 1393 if token[0] == 'name': 1394 token = self.parseGlobal(token) 1395 else: 1396 self.error("token %s %s unexpected at the top level" % ( 1397 token[0], token[1])) 1398 token = self.parseGlobal(token) 1399 return 1400 self.parseTopComment(self.top_comment) 1401 return self.index 1402 1403 1404class docBuilder: 1405 """A documentation builder""" 1406 def __init__(self, name, directories=['.'], excludes=[]): 1407 self.name = name 1408 self.directories = directories 1409 self.excludes = excludes + ignored_files.keys() 1410 self.modules = {} 1411 self.headers = {} 1412 self.idx = index() 1413 self.xref = {} 1414 self.index = {} 1415 if name == 'libxml2': 1416 self.basename = 'libxml' 1417 else: 1418 self.basename = name 1419 1420 def indexString(self, id, str): 1421 if str == None: 1422 return 1423 str = string.replace(str, "'", ' ') 1424 str = string.replace(str, '"', ' ') 1425 str = string.replace(str, "/", ' ') 1426 str = string.replace(str, '*', ' ') 1427 str = string.replace(str, "[", ' ') 1428 str = string.replace(str, "]", ' ') 1429 str = string.replace(str, "(", ' ') 1430 str = string.replace(str, ")", ' ') 1431 str = string.replace(str, "<", ' ') 1432 str = string.replace(str, '>', ' ') 1433 str = string.replace(str, "&", ' ') 1434 str = string.replace(str, '#', ' ') 1435 str = string.replace(str, ",", ' ') 1436 str = string.replace(str, '.', ' ') 1437 str = string.replace(str, ';', ' ') 1438 tokens = string.split(str) 1439 for token in tokens: 1440 try: 1441 c = token[0] 1442 if string.find(string.letters, c) < 0: 1443 pass 1444 elif len(token) < 3: 1445 pass 1446 else: 1447 lower = string.lower(token) 1448 # TODO: generalize this a bit 1449 if lower == 'and' or lower == 'the': 1450 pass 1451 elif self.xref.has_key(token): 1452 self.xref[token].append(id) 1453 else: 1454 self.xref[token] = [id] 1455 except: 1456 pass 1457 1458 def analyze(self): 1459 print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys())) 1460 self.idx.analyze() 1461 1462 def scanHeaders(self): 1463 for header in self.headers.keys(): 1464 parser = CParser(header) 1465 idx = parser.parse() 1466 self.headers[header] = idx; 1467 self.idx.merge(idx) 1468 1469 def scanModules(self): 1470 for module in self.modules.keys(): 1471 parser = CParser(module) 1472 idx = parser.parse() 1473 # idx.analyze() 1474 self.modules[module] = idx 1475 self.idx.merge_public(idx) 1476 1477 def scan(self): 1478 for directory in self.directories: 1479 files = glob.glob(directory + "/*.c") 1480 for file in files: 1481 skip = 0 1482 for excl in self.excludes: 1483 if string.find(file, excl) != -1: 1484 skip = 1; 1485 break 1486 if skip == 0: 1487 self.modules[file] = None; 1488 files = glob.glob(directory + "/*.h") 1489 for file in files: 1490 skip = 0 1491 for excl in self.excludes: 1492 if string.find(file, excl) != -1: 1493 skip = 1; 1494 break 1495 if skip == 0: 1496 self.headers[file] = None; 1497 self.scanHeaders() 1498 self.scanModules() 1499 1500 def modulename_file(self, file): 1501 module = os.path.basename(file) 1502 if module[-2:] == '.h': 1503 module = module[:-2] 1504 return module 1505 1506 def serialize_enum(self, output, name): 1507 id = self.idx.enums[name] 1508 output.write(" <enum name='%s' file='%s'" % (name, 1509 self.modulename_file(id.module))) 1510 if id.info != None: 1511 info = id.info 1512 if info[0] != None and info[0] != '': 1513 try: 1514 val = eval(info[0]) 1515 except: 1516 val = info[0] 1517 output.write(" value='%s'" % (val)); 1518 if info[2] != None and info[2] != '': 1519 output.write(" type='%s'" % info[2]); 1520 if info[1] != None and info[1] != '': 1521 output.write(" info='%s'" % escape(info[1])); 1522 output.write("/>\n") 1523 1524 def serialize_macro(self, output, name): 1525 id = self.idx.macros[name] 1526 output.write(" <macro name='%s' file='%s'>\n" % (name, 1527 self.modulename_file(id.module))) 1528 if id.info != None: 1529 try: 1530 (args, desc) = id.info 1531 if desc != None and desc != "": 1532 output.write(" <info>%s</info>\n" % (escape(desc))) 1533 self.indexString(name, desc) 1534 for arg in args: 1535 (name, desc) = arg 1536 if desc != None and desc != "": 1537 output.write(" <arg name='%s' info='%s'/>\n" % ( 1538 name, escape(desc))) 1539 self.indexString(name, desc) 1540 else: 1541 output.write(" <arg name='%s'/>\n" % (name)) 1542 except: 1543 pass 1544 output.write(" </macro>\n") 1545 1546 def serialize_typedef(self, output, name): 1547 id = self.idx.typedefs[name] 1548 if id.info[0:7] == 'struct ': 1549 output.write(" <struct name='%s' file='%s' type='%s'" % ( 1550 name, self.modulename_file(id.module), id.info)) 1551 name = id.info[7:] 1552 if self.idx.structs.has_key(name) and ( \ 1553 type(self.idx.structs[name].info) == type(()) or 1554 type(self.idx.structs[name].info) == type([])): 1555 output.write(">\n"); 1556 try: 1557 for field in self.idx.structs[name].info: 1558 desc = field[2] 1559 self.indexString(name, desc) 1560 if desc == None: 1561 desc = '' 1562 else: 1563 desc = escape(desc) 1564 output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc)) 1565 except: 1566 print "Failed to serialize struct %s" % (name) 1567 output.write(" </struct>\n") 1568 else: 1569 output.write("/>\n"); 1570 else : 1571 output.write(" <typedef name='%s' file='%s' type='%s'/>\n" % ( 1572 name, self.modulename_file(id.module), id.info)) 1573 1574 def serialize_variable(self, output, name): 1575 id = self.idx.variables[name] 1576 if id.info != None: 1577 output.write(" <variable name='%s' file='%s' type='%s'/>\n" % ( 1578 name, self.modulename_file(id.module), id.info)) 1579 else: 1580 output.write(" <variable name='%s' file='%s'/>\n" % ( 1581 name, self.modulename_file(id.module))) 1582 1583 def serialize_function(self, output, name): 1584 id = self.idx.functions[name] 1585 output.write(" <%s name='%s' file='%s'>\n" % (id.type, name, 1586 self.modulename_file(id.module))) 1587 try: 1588 (ret, params, desc) = id.info 1589 output.write(" <info>%s</info>\n" % (escape(desc))) 1590 self.indexString(name, desc) 1591 if ret[0] != None: 1592 if ret[0] == "void": 1593 output.write(" <return type='void'/>\n") 1594 else: 1595 output.write(" <return type='%s' info='%s'/>\n" % ( 1596 ret[0], escape(ret[1]))) 1597 self.indexString(name, ret[1]) 1598 for param in params: 1599 if param[0] == 'void': 1600 continue 1601 if param[2] == None: 1602 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0])) 1603 else: 1604 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2]))) 1605 self.indexString(name, param[2]) 1606 except: 1607 print "Failed to save function %s info: " % name, `id.info` 1608 output.write(" </%s>\n" % (id.type)) 1609 1610 def serialize_exports(self, output, file): 1611 module = self.modulename_file(file) 1612 output.write(" <file name='%s'>\n" % (module)) 1613 dict = self.headers[file] 1614 if dict.info != None: 1615 for data in ('Summary', 'Description', 'Author'): 1616 try: 1617 output.write(" <%s>%s</%s>\n" % ( 1618 string.lower(data), 1619 escape(dict.info[data]), 1620 string.lower(data))) 1621 except: 1622 print "Header %s lacks a %s description" % (module, data) 1623 if dict.info.has_key('Description'): 1624 desc = dict.info['Description'] 1625 if string.find(desc, "DEPRECATED") != -1: 1626 output.write(" <deprecated/>\n") 1627 1628 ids = dict.macros.keys() 1629 ids.sort() 1630 for id in uniq(ids): 1631 # Macros are sometime used to masquerade other types. 1632 if dict.functions.has_key(id): 1633 continue 1634 if dict.variables.has_key(id): 1635 continue 1636 if dict.typedefs.has_key(id): 1637 continue 1638 if dict.structs.has_key(id): 1639 continue 1640 if dict.enums.has_key(id): 1641 continue 1642 output.write(" <exports symbol='%s' type='macro'/>\n" % (id)) 1643 ids = dict.enums.keys() 1644 ids.sort() 1645 for id in uniq(ids): 1646 output.write(" <exports symbol='%s' type='enum'/>\n" % (id)) 1647 ids = dict.typedefs.keys() 1648 ids.sort() 1649 for id in uniq(ids): 1650 output.write(" <exports symbol='%s' type='typedef'/>\n" % (id)) 1651 ids = dict.structs.keys() 1652 ids.sort() 1653 for id in uniq(ids): 1654 output.write(" <exports symbol='%s' type='struct'/>\n" % (id)) 1655 ids = dict.variables.keys() 1656 ids.sort() 1657 for id in uniq(ids): 1658 output.write(" <exports symbol='%s' type='variable'/>\n" % (id)) 1659 ids = dict.functions.keys() 1660 ids.sort() 1661 for id in uniq(ids): 1662 output.write(" <exports symbol='%s' type='function'/>\n" % (id)) 1663 output.write(" </file>\n") 1664 1665 def serialize_xrefs_files(self, output): 1666 headers = self.headers.keys() 1667 headers.sort() 1668 for file in headers: 1669 module = self.modulename_file(file) 1670 output.write(" <file name='%s'>\n" % (module)) 1671 dict = self.headers[file] 1672 ids = uniq(dict.functions.keys() + dict.variables.keys() + \ 1673 dict.macros.keys() + dict.typedefs.keys() + \ 1674 dict.structs.keys() + dict.enums.keys()) 1675 ids.sort() 1676 for id in ids: 1677 output.write(" <ref name='%s'/>\n" % (id)) 1678 output.write(" </file>\n") 1679 pass 1680 1681 def serialize_xrefs_functions(self, output): 1682 funcs = {} 1683 for name in self.idx.functions.keys(): 1684 id = self.idx.functions[name] 1685 try: 1686 (ret, params, desc) = id.info 1687 for param in params: 1688 if param[0] == 'void': 1689 continue 1690 if funcs.has_key(param[0]): 1691 funcs[param[0]].append(name) 1692 else: 1693 funcs[param[0]] = [name] 1694 except: 1695 pass 1696 typ = funcs.keys() 1697 typ.sort() 1698 for type in typ: 1699 if type == '' or type == 'void' or type == "int" or \ 1700 type == "char *" or type == "const char *" : 1701 continue 1702 output.write(" <type name='%s'>\n" % (type)) 1703 ids = funcs[type] 1704 ids.sort() 1705 pid = '' # not sure why we have dups, but get rid of them! 1706 for id in ids: 1707 if id != pid: 1708 output.write(" <ref name='%s'/>\n" % (id)) 1709 pid = id 1710 output.write(" </type>\n") 1711 1712 def serialize_xrefs_constructors(self, output): 1713 funcs = {} 1714 for name in self.idx.functions.keys(): 1715 id = self.idx.functions[name] 1716 try: 1717 (ret, params, desc) = id.info 1718 if ret[0] == "void": 1719 continue 1720 if funcs.has_key(ret[0]): 1721 funcs[ret[0]].append(name) 1722 else: 1723 funcs[ret[0]] = [name] 1724 except: 1725 pass 1726 typ = funcs.keys() 1727 typ.sort() 1728 for type in typ: 1729 if type == '' or type == 'void' or type == "int" or \ 1730 type == "char *" or type == "const char *" : 1731 continue 1732 output.write(" <type name='%s'>\n" % (type)) 1733 ids = funcs[type] 1734 ids.sort() 1735 for id in ids: 1736 output.write(" <ref name='%s'/>\n" % (id)) 1737 output.write(" </type>\n") 1738 1739 def serialize_xrefs_alpha(self, output): 1740 letter = None 1741 ids = self.idx.identifiers.keys() 1742 ids.sort() 1743 for id in ids: 1744 if id[0] != letter: 1745 if letter != None: 1746 output.write(" </letter>\n") 1747 letter = id[0] 1748 output.write(" <letter name='%s'>\n" % (letter)) 1749 output.write(" <ref name='%s'/>\n" % (id)) 1750 if letter != None: 1751 output.write(" </letter>\n") 1752 1753 def serialize_xrefs_references(self, output): 1754 typ = self.idx.identifiers.keys() 1755 typ.sort() 1756 for id in typ: 1757 idf = self.idx.identifiers[id] 1758 module = idf.module 1759 output.write(" <reference name='%s' href='%s'/>\n" % (id, 1760 'html/' + self.basename + '-' + 1761 self.modulename_file(module) + '.html#' + 1762 id)) 1763 1764 def serialize_xrefs_index(self, output): 1765 index = self.xref 1766 typ = index.keys() 1767 typ.sort() 1768 letter = None 1769 count = 0 1770 chunk = 0 1771 chunks = [] 1772 for id in typ: 1773 if len(index[id]) > 30: 1774 continue 1775 if id[0] != letter: 1776 if letter == None or count > 200: 1777 if letter != None: 1778 output.write(" </letter>\n") 1779 output.write(" </chunk>\n") 1780 count = 0 1781 chunks.append(["chunk%s" % (chunk -1), first_letter, letter]) 1782 output.write(" <chunk name='chunk%s'>\n" % (chunk)) 1783 first_letter = id[0] 1784 chunk = chunk + 1 1785 elif letter != None: 1786 output.write(" </letter>\n") 1787 letter = id[0] 1788 output.write(" <letter name='%s'>\n" % (letter)) 1789 output.write(" <word name='%s'>\n" % (id)) 1790 tokens = index[id]; 1791 tokens.sort() 1792 tok = None 1793 for token in tokens: 1794 if tok == token: 1795 continue 1796 tok = token 1797 output.write(" <ref name='%s'/>\n" % (token)) 1798 count = count + 1 1799 output.write(" </word>\n") 1800 if letter != None: 1801 output.write(" </letter>\n") 1802 output.write(" </chunk>\n") 1803 if count != 0: 1804 chunks.append(["chunk%s" % (chunk -1), first_letter, letter]) 1805 output.write(" <chunks>\n") 1806 for ch in chunks: 1807 output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % ( 1808 ch[0], ch[1], ch[2])) 1809 output.write(" </chunks>\n") 1810 1811 def serialize_xrefs(self, output): 1812 output.write(" <references>\n") 1813 self.serialize_xrefs_references(output) 1814 output.write(" </references>\n") 1815 output.write(" <alpha>\n") 1816 self.serialize_xrefs_alpha(output) 1817 output.write(" </alpha>\n") 1818 output.write(" <constructors>\n") 1819 self.serialize_xrefs_constructors(output) 1820 output.write(" </constructors>\n") 1821 output.write(" <functions>\n") 1822 self.serialize_xrefs_functions(output) 1823 output.write(" </functions>\n") 1824 output.write(" <files>\n") 1825 self.serialize_xrefs_files(output) 1826 output.write(" </files>\n") 1827 output.write(" <index>\n") 1828 self.serialize_xrefs_index(output) 1829 output.write(" </index>\n") 1830 1831 def serialize(self, outdir): 1832 filename = outdir + "%s-api.xml" % self.name 1833 print "Saving XML description %s" % (filename) 1834 output = open(filename, "w") 1835 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') 1836 output.write("<api name='%s'>\n" % self.name) 1837 output.write(" <files>\n") 1838 headers = self.headers.keys() 1839 headers.sort() 1840 for file in headers: 1841 self.serialize_exports(output, file) 1842 output.write(" </files>\n") 1843 output.write(" <symbols>\n") 1844 macros = self.idx.macros.keys() 1845 macros.sort() 1846 for macro in macros: 1847 self.serialize_macro(output, macro) 1848 enums = self.idx.enums.keys() 1849 enums.sort() 1850 for enum in enums: 1851 self.serialize_enum(output, enum) 1852 typedefs = self.idx.typedefs.keys() 1853 typedefs.sort() 1854 for typedef in typedefs: 1855 self.serialize_typedef(output, typedef) 1856 variables = self.idx.variables.keys() 1857 variables.sort() 1858 for variable in variables: 1859 self.serialize_variable(output, variable) 1860 functions = self.idx.functions.keys() 1861 functions.sort() 1862 for function in functions: 1863 self.serialize_function(output, function) 1864 output.write(" </symbols>\n") 1865 output.write("</api>\n") 1866 output.close() 1867 1868 filename = outdir + "%s-refs.xml" % self.name 1869 print "Saving XML Cross References %s" % (filename) 1870 output = open(filename, "w") 1871 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') 1872 output.write("<apirefs name='%s'>\n" % self.name) 1873 self.serialize_xrefs(output) 1874 output.write("</apirefs>\n") 1875 output.close() 1876 1877 1878def rebuild(): 1879 builder = None 1880 if glob.glob("parser.c") != [] : 1881 print "Rebuilding API description for libxml2" 1882 builder = docBuilder("libxml2", [".", "."], 1883 ["xmlwin32version.h", "tst.c"]) 1884 elif glob.glob("../parser.c") != [] : 1885 print "Rebuilding API description for libxml2" 1886 builder = docBuilder("libxml2", ["..", "../include/libxml"], 1887 ["xmlwin32version.h", "tst.c"]) 1888 elif glob.glob("../libxslt/transform.c") != [] : 1889 print "Rebuilding API description for libxslt" 1890 builder = docBuilder("libxslt", ["../libxslt"], 1891 ["win32config.h", "libxslt.h", "tst.c"]) 1892 else: 1893 print "rebuild() failed, unable to guess the module" 1894 return None 1895 builder.scan() 1896 builder.analyze() 1897 builder.serialize("./") 1898 if glob.glob("../libexslt/exslt.c") != [] : 1899 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"]) 1900 extra.scan() 1901 extra.analyze() 1902 extra.serialize("EXSLT/") 1903 return builder 1904 1905# 1906# for debugging the parser 1907# 1908def parse(filename): 1909 parser = CParser(filename) 1910 idx = parser.parse() 1911 return idx 1912 1913if __name__ == "__main__": 1914 rebuild() 1915