1#!/usr/bin/python -u 2# 3# This is the API builder, it parses the C sources and build the 4# API formal description in XML. 5# 6# See Copyright for the status of this software. 7# 8# daniel@veillard.com 9# 10import os, sys 11import string 12import glob 13 14debug=0 15#debugsym='ignorableWhitespaceSAXFunc' 16debugsym=None 17 18# 19# C parser analysis code 20# 21ignored_files = { 22 "trio": "too many non standard macros", 23 "trio.c": "too many non standard macros", 24 "trionan.c": "too many non standard macros", 25 "triostr.c": "too many non standard macros", 26 "acconfig.h": "generated portability layer", 27 "config.h": "generated portability layer", 28 "libxml.h": "internal only", 29 "testOOM.c": "out of memory tester", 30 "testOOMlib.h": "out of memory tester", 31 "testOOMlib.c": "out of memory tester", 32 "rngparser.c": "not yet integrated", 33 "rngparser.h": "not yet integrated", 34 "elfgcchack.h": "not a normal header", 35 "testHTML.c": "test tool", 36 "testReader.c": "test tool", 37 "testSchemas.c": "test tool", 38 "testXPath.c": "test tool", 39 "testAutomata.c": "test tool", 40 "testModule.c": "test tool", 41 "testRegexp.c": "test tool", 42 "testThreads.c": "test tool", 43 "testC14N.c": "test tool", 44 "testRelax.c": "test tool", 45 "testThreadsWin32.c": "test tool", 46 "testSAX.c": "test tool", 47 "testURI.c": "test tool", 48 "testapi.c": "generated regression tests", 49 "runtest.c": "regression tests program", 50 "runsuite.c": "regression tests program", 51 "tst.c": "not part of the library", 52 "test.c": "not part of the library", 53 "testdso.c": "test for dynamid shared libraries", 54 "testrecurse.c": "test for entities recursions", 55} 56 57ignored_words = { 58 "WINAPI": (0, "Windows keyword"), 59 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"), 60 "XMLPUBVAR": (0, "Special macro for extern vars for win32"), 61 "XSLTPUBVAR": (0, "Special macro for extern vars for win32"), 62 "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"), 63 "XMLPUBFUN": (0, "Special macro for extern funcs for win32"), 64 "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"), 65 "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"), 66 "XMLCALL": (0, "Special macro for win32 calls"), 67 "XSLTCALL": (0, "Special macro for win32 calls"), 68 "XMLCDECL": (0, "Special macro for win32 calls"), 69 "EXSLTCALL": (0, "Special macro for win32 calls"), 70 "__declspec": (3, "Windows keyword"), 71 "__stdcall": (0, "Windows keyword"), 72 "ATTRIBUTE_UNUSED": (0, "macro keyword"), 73 "LIBEXSLT_PUBLIC": (0, "macro keyword"), 74 "X_IN_Y": (5, "macro function builder"), 75} 76 77def escape(raw): 78 raw = string.replace(raw, '&', '&') 79 raw = string.replace(raw, '<', '<') 80 raw = string.replace(raw, '>', '>') 81 raw = string.replace(raw, "'", ''') 82 raw = string.replace(raw, '"', '"') 83 return raw 84 85def uniq(items): 86 d = {} 87 for item in items: 88 d[item]=1 89 return d.keys() 90 91class identifier: 92 def __init__(self, name, header=None, module=None, type=None, lineno = 0, 93 info=None, extra=None, conditionals = None): 94 self.name = name 95 self.header = header 96 self.module = module 97 self.type = type 98 self.info = info 99 self.extra = extra 100 self.lineno = lineno 101 self.static = 0 102 if conditionals == None or len(conditionals) == 0: 103 self.conditionals = None 104 else: 105 self.conditionals = conditionals[:] 106 if self.name == debugsym: 107 print "=> define %s : %s" % (debugsym, (module, type, info, 108 extra, conditionals)) 109 110 def __repr__(self): 111 r = "%s %s:" % (self.type, self.name) 112 if self.static: 113 r = r + " static" 114 if self.module != None: 115 r = r + " from %s" % (self.module) 116 if self.info != None: 117 r = r + " " + `self.info` 118 if self.extra != None: 119 r = r + " " + `self.extra` 120 if self.conditionals != None: 121 r = r + " " + `self.conditionals` 122 return r 123 124 125 def set_header(self, header): 126 self.header = header 127 def set_module(self, module): 128 self.module = module 129 def set_type(self, type): 130 self.type = type 131 def set_info(self, info): 132 self.info = info 133 def set_extra(self, extra): 134 self.extra = extra 135 def set_lineno(self, lineno): 136 self.lineno = lineno 137 def set_static(self, static): 138 self.static = static 139 def set_conditionals(self, conditionals): 140 if conditionals == None or len(conditionals) == 0: 141 self.conditionals = None 142 else: 143 self.conditionals = conditionals[:] 144 145 def get_name(self): 146 return self.name 147 def get_header(self): 148 return self.module 149 def get_module(self): 150 return self.module 151 def get_type(self): 152 return self.type 153 def get_info(self): 154 return self.info 155 def get_lineno(self): 156 return self.lineno 157 def get_extra(self): 158 return self.extra 159 def get_static(self): 160 return self.static 161 def get_conditionals(self): 162 return self.conditionals 163 164 def update(self, header, module, type = None, info = None, extra=None, 165 conditionals=None): 166 if self.name == debugsym: 167 print "=> update %s : %s" % (debugsym, (module, type, info, 168 extra, conditionals)) 169 if header != None and self.header == None: 170 self.set_header(module) 171 if module != None and (self.module == None or self.header == self.module): 172 self.set_module(module) 173 if type != None and self.type == None: 174 self.set_type(type) 175 if info != None: 176 self.set_info(info) 177 if extra != None: 178 self.set_extra(extra) 179 if conditionals != None: 180 self.set_conditionals(conditionals) 181 182class index: 183 def __init__(self, name = "noname"): 184 self.name = name 185 self.identifiers = {} 186 self.functions = {} 187 self.variables = {} 188 self.includes = {} 189 self.structs = {} 190 self.enums = {} 191 self.typedefs = {} 192 self.macros = {} 193 self.references = {} 194 self.info = {} 195 196 def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None): 197 if name[0:2] == '__': 198 return None 199 d = None 200 try: 201 d = self.identifiers[name] 202 d.update(header, module, type, lineno, info, extra, conditionals) 203 except: 204 d = identifier(name, header, module, type, lineno, info, extra, conditionals) 205 self.identifiers[name] = d 206 207 if d != None and static == 1: 208 d.set_static(1) 209 210 if d != None and name != None and type != None: 211 self.references[name] = d 212 213 if name == debugsym: 214 print "New ref: %s" % (d) 215 216 return d 217 218 def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None): 219 if name[0:2] == '__': 220 return None 221 d = None 222 try: 223 d = self.identifiers[name] 224 d.update(header, module, type, lineno, info, extra, conditionals) 225 except: 226 d = identifier(name, header, module, type, lineno, info, extra, conditionals) 227 self.identifiers[name] = d 228 229 if d != None and static == 1: 230 d.set_static(1) 231 232 if d != None and name != None and type != None: 233 if type == "function": 234 self.functions[name] = d 235 elif type == "functype": 236 self.functions[name] = d 237 elif type == "variable": 238 self.variables[name] = d 239 elif type == "include": 240 self.includes[name] = d 241 elif type == "struct": 242 self.structs[name] = d 243 elif type == "enum": 244 self.enums[name] = d 245 elif type == "typedef": 246 self.typedefs[name] = d 247 elif type == "macro": 248 self.macros[name] = d 249 else: 250 print "Unable to register type ", type 251 252 if name == debugsym: 253 print "New symbol: %s" % (d) 254 255 return d 256 257 def merge(self, idx): 258 for id in idx.functions.keys(): 259 # 260 # macro might be used to override functions or variables 261 # definitions 262 # 263 if self.macros.has_key(id): 264 del self.macros[id] 265 if self.functions.has_key(id): 266 print "function %s from %s redeclared in %s" % ( 267 id, self.functions[id].header, idx.functions[id].header) 268 else: 269 self.functions[id] = idx.functions[id] 270 self.identifiers[id] = idx.functions[id] 271 for id in idx.variables.keys(): 272 # 273 # macro might be used to override functions or variables 274 # definitions 275 # 276 if self.macros.has_key(id): 277 del self.macros[id] 278 if self.variables.has_key(id): 279 print "variable %s from %s redeclared in %s" % ( 280 id, self.variables[id].header, idx.variables[id].header) 281 else: 282 self.variables[id] = idx.variables[id] 283 self.identifiers[id] = idx.variables[id] 284 for id in idx.structs.keys(): 285 if self.structs.has_key(id): 286 print "struct %s from %s redeclared in %s" % ( 287 id, self.structs[id].header, idx.structs[id].header) 288 else: 289 self.structs[id] = idx.structs[id] 290 self.identifiers[id] = idx.structs[id] 291 for id in idx.typedefs.keys(): 292 if self.typedefs.has_key(id): 293 print "typedef %s from %s redeclared in %s" % ( 294 id, self.typedefs[id].header, idx.typedefs[id].header) 295 else: 296 self.typedefs[id] = idx.typedefs[id] 297 self.identifiers[id] = idx.typedefs[id] 298 for id in idx.macros.keys(): 299 # 300 # macro might be used to override functions or variables 301 # definitions 302 # 303 if self.variables.has_key(id): 304 continue 305 if self.functions.has_key(id): 306 continue 307 if self.enums.has_key(id): 308 continue 309 if self.macros.has_key(id): 310 print "macro %s from %s redeclared in %s" % ( 311 id, self.macros[id].header, idx.macros[id].header) 312 else: 313 self.macros[id] = idx.macros[id] 314 self.identifiers[id] = idx.macros[id] 315 for id in idx.enums.keys(): 316 if self.enums.has_key(id): 317 print "enum %s from %s redeclared in %s" % ( 318 id, self.enums[id].header, idx.enums[id].header) 319 else: 320 self.enums[id] = idx.enums[id] 321 self.identifiers[id] = idx.enums[id] 322 323 def merge_public(self, idx): 324 for id in idx.functions.keys(): 325 if self.functions.has_key(id): 326 # check that function condition agrees with header 327 if idx.functions[id].conditionals != \ 328 self.functions[id].conditionals: 329 print "Header condition differs from Function for %s:" \ 330 % id 331 print " H: %s" % self.functions[id].conditionals 332 print " C: %s" % idx.functions[id].conditionals 333 up = idx.functions[id] 334 self.functions[id].update(None, up.module, up.type, up.info, up.extra) 335 # else: 336 # print "Function %s from %s is not declared in headers" % ( 337 # id, idx.functions[id].module) 338 # TODO: do the same for variables. 339 340 def analyze_dict(self, type, dict): 341 count = 0 342 public = 0 343 for name in dict.keys(): 344 id = dict[name] 345 count = count + 1 346 if id.static == 0: 347 public = public + 1 348 if count != public: 349 print " %d %s , %d public" % (count, type, public) 350 elif count != 0: 351 print " %d public %s" % (count, type) 352 353 354 def analyze(self): 355 self.analyze_dict("functions", self.functions) 356 self.analyze_dict("variables", self.variables) 357 self.analyze_dict("structs", self.structs) 358 self.analyze_dict("typedefs", self.typedefs) 359 self.analyze_dict("macros", self.macros) 360 361class CLexer: 362 """A lexer for the C language, tokenize the input by reading and 363 analyzing it line by line""" 364 def __init__(self, input): 365 self.input = input 366 self.tokens = [] 367 self.line = "" 368 self.lineno = 0 369 370 def getline(self): 371 line = '' 372 while line == '': 373 line = self.input.readline() 374 if not line: 375 return None 376 self.lineno = self.lineno + 1 377 line = string.lstrip(line) 378 line = string.rstrip(line) 379 if line == '': 380 continue 381 while line[-1] == '\\': 382 line = line[:-1] 383 n = self.input.readline() 384 self.lineno = self.lineno + 1 385 n = string.lstrip(n) 386 n = string.rstrip(n) 387 if not n: 388 break 389 else: 390 line = line + n 391 return line 392 393 def getlineno(self): 394 return self.lineno 395 396 def push(self, token): 397 self.tokens.insert(0, token); 398 399 def debug(self): 400 print "Last token: ", self.last 401 print "Token queue: ", self.tokens 402 print "Line %d end: " % (self.lineno), self.line 403 404 def token(self): 405 while self.tokens == []: 406 if self.line == "": 407 line = self.getline() 408 else: 409 line = self.line 410 self.line = "" 411 if line == None: 412 return None 413 414 if line[0] == '#': 415 self.tokens = map((lambda x: ('preproc', x)), 416 string.split(line)) 417 break; 418 l = len(line) 419 if line[0] == '"' or line[0] == "'": 420 end = line[0] 421 line = line[1:] 422 found = 0 423 tok = "" 424 while found == 0: 425 i = 0 426 l = len(line) 427 while i < l: 428 if line[i] == end: 429 self.line = line[i+1:] 430 line = line[:i] 431 l = i 432 found = 1 433 break 434 if line[i] == '\\': 435 i = i + 1 436 i = i + 1 437 tok = tok + line 438 if found == 0: 439 line = self.getline() 440 if line == None: 441 return None 442 self.last = ('string', tok) 443 return self.last 444 445 if l >= 2 and line[0] == '/' and line[1] == '*': 446 line = line[2:] 447 found = 0 448 tok = "" 449 while found == 0: 450 i = 0 451 l = len(line) 452 while i < l: 453 if line[i] == '*' and i+1 < l and line[i+1] == '/': 454 self.line = line[i+2:] 455 line = line[:i-1] 456 l = i 457 found = 1 458 break 459 i = i + 1 460 if tok != "": 461 tok = tok + "\n" 462 tok = tok + line 463 if found == 0: 464 line = self.getline() 465 if line == None: 466 return None 467 self.last = ('comment', tok) 468 return self.last 469 if l >= 2 and line[0] == '/' and line[1] == '/': 470 line = line[2:] 471 self.last = ('comment', line) 472 return self.last 473 i = 0 474 while i < l: 475 if line[i] == '/' and i+1 < l and line[i+1] == '/': 476 self.line = line[i:] 477 line = line[:i] 478 break 479 if line[i] == '/' and i+1 < l and line[i+1] == '*': 480 self.line = line[i:] 481 line = line[:i] 482 break 483 if line[i] == '"' or line[i] == "'": 484 self.line = line[i:] 485 line = line[:i] 486 break 487 i = i + 1 488 l = len(line) 489 i = 0 490 while i < l: 491 if line[i] == ' ' or line[i] == '\t': 492 i = i + 1 493 continue 494 o = ord(line[i]) 495 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 496 (o >= 48 and o <= 57): 497 s = i 498 while i < l: 499 o = ord(line[i]) 500 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 501 (o >= 48 and o <= 57) or string.find( 502 " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1: 503 i = i + 1 504 else: 505 break 506 self.tokens.append(('name', line[s:i])) 507 continue 508 if string.find("(){}:;,[]", line[i]) != -1: 509# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \ 510# line[i] == '}' or line[i] == ':' or line[i] == ';' or \ 511# line[i] == ',' or line[i] == '[' or line[i] == ']': 512 self.tokens.append(('sep', line[i])) 513 i = i + 1 514 continue 515 if string.find("+-*><=/%&!|.", line[i]) != -1: 516# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \ 517# line[i] == '>' or line[i] == '<' or line[i] == '=' or \ 518# line[i] == '/' or line[i] == '%' or line[i] == '&' or \ 519# line[i] == '!' or line[i] == '|' or line[i] == '.': 520 if line[i] == '.' and i + 2 < l and \ 521 line[i+1] == '.' and line[i+2] == '.': 522 self.tokens.append(('name', '...')) 523 i = i + 3 524 continue 525 526 j = i + 1 527 if j < l and ( 528 string.find("+-*><=/%&!|", line[j]) != -1): 529# line[j] == '+' or line[j] == '-' or line[j] == '*' or \ 530# line[j] == '>' or line[j] == '<' or line[j] == '=' or \ 531# line[j] == '/' or line[j] == '%' or line[j] == '&' or \ 532# line[j] == '!' or line[j] == '|'): 533 self.tokens.append(('op', line[i:j+1])) 534 i = j + 1 535 else: 536 self.tokens.append(('op', line[i])) 537 i = i + 1 538 continue 539 s = i 540 while i < l: 541 o = ord(line[i]) 542 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 543 (o >= 48 and o <= 57) or ( 544 string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1): 545# line[i] != ' ' and line[i] != '\t' and 546# line[i] != '(' and line[i] != ')' and 547# line[i] != '{' and line[i] != '}' and 548# line[i] != ':' and line[i] != ';' and 549# line[i] != ',' and line[i] != '+' and 550# line[i] != '-' and line[i] != '*' and 551# line[i] != '/' and line[i] != '%' and 552# line[i] != '&' and line[i] != '!' and 553# line[i] != '|' and line[i] != '[' and 554# line[i] != ']' and line[i] != '=' and 555# line[i] != '*' and line[i] != '>' and 556# line[i] != '<'): 557 i = i + 1 558 else: 559 break 560 self.tokens.append(('name', line[s:i])) 561 562 tok = self.tokens[0] 563 self.tokens = self.tokens[1:] 564 self.last = tok 565 return tok 566 567class CParser: 568 """The C module parser""" 569 def __init__(self, filename, idx = None): 570 self.filename = filename 571 if len(filename) > 2 and filename[-2:] == '.h': 572 self.is_header = 1 573 else: 574 self.is_header = 0 575 self.input = open(filename) 576 self.lexer = CLexer(self.input) 577 if idx == None: 578 self.index = index() 579 else: 580 self.index = idx 581 self.top_comment = "" 582 self.last_comment = "" 583 self.comment = None 584 self.collect_ref = 0 585 self.no_error = 0 586 self.conditionals = [] 587 self.defines = [] 588 589 def collect_references(self): 590 self.collect_ref = 1 591 592 def stop_error(self): 593 self.no_error = 1 594 595 def start_error(self): 596 self.no_error = 0 597 598 def lineno(self): 599 return self.lexer.getlineno() 600 601 def index_add(self, name, module, static, type, info=None, extra = None): 602 if self.is_header == 1: 603 self.index.add(name, module, module, static, type, self.lineno(), 604 info, extra, self.conditionals) 605 else: 606 self.index.add(name, None, module, static, type, self.lineno(), 607 info, extra, self.conditionals) 608 609 def index_add_ref(self, name, module, static, type, info=None, 610 extra = None): 611 if self.is_header == 1: 612 self.index.add_ref(name, module, module, static, type, 613 self.lineno(), info, extra, self.conditionals) 614 else: 615 self.index.add_ref(name, None, module, static, type, self.lineno(), 616 info, extra, self.conditionals) 617 618 def warning(self, msg): 619 if self.no_error: 620 return 621 print msg 622 623 def error(self, msg, token=-1): 624 if self.no_error: 625 return 626 627 print "Parse Error: " + msg 628 if token != -1: 629 print "Got token ", token 630 self.lexer.debug() 631 sys.exit(1) 632 633 def debug(self, msg, token=-1): 634 print "Debug: " + msg 635 if token != -1: 636 print "Got token ", token 637 self.lexer.debug() 638 639 def parseTopComment(self, comment): 640 res = {} 641 lines = string.split(comment, "\n") 642 item = None 643 for line in lines: 644 while line != "" and (line[0] == ' ' or line[0] == '\t'): 645 line = line[1:] 646 while line != "" and line[0] == '*': 647 line = line[1:] 648 while line != "" and (line[0] == ' ' or line[0] == '\t'): 649 line = line[1:] 650 try: 651 (it, line) = string.split(line, ":", 1) 652 item = it 653 while line != "" and (line[0] == ' ' or line[0] == '\t'): 654 line = line[1:] 655 if res.has_key(item): 656 res[item] = res[item] + " " + line 657 else: 658 res[item] = line 659 except: 660 if item != None: 661 if res.has_key(item): 662 res[item] = res[item] + " " + line 663 else: 664 res[item] = line 665 self.index.info = res 666 667 def parseComment(self, token): 668 if self.top_comment == "": 669 self.top_comment = token[1] 670 if self.comment == None or token[1][0] == '*': 671 self.comment = token[1]; 672 else: 673 self.comment = self.comment + token[1] 674 token = self.lexer.token() 675 676 if string.find(self.comment, "DOC_DISABLE") != -1: 677 self.stop_error() 678 679 if string.find(self.comment, "DOC_ENABLE") != -1: 680 self.start_error() 681 682 return token 683 684 # 685 # Parse a comment block associate to a typedef 686 # 687 def parseTypeComment(self, name, quiet = 0): 688 if name[0:2] == '__': 689 quiet = 1 690 691 args = [] 692 desc = "" 693 694 if self.comment == None: 695 if not quiet: 696 self.warning("Missing comment for type %s" % (name)) 697 return((args, desc)) 698 if self.comment[0] != '*': 699 if not quiet: 700 self.warning("Missing * in type comment for %s" % (name)) 701 return((args, desc)) 702 lines = string.split(self.comment, '\n') 703 if lines[0] == '*': 704 del lines[0] 705 if lines[0] != "* %s:" % (name): 706 if not quiet: 707 self.warning("Misformatted type comment for %s" % (name)) 708 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 709 return((args, desc)) 710 del lines[0] 711 while len(lines) > 0 and lines[0] == '*': 712 del lines[0] 713 desc = "" 714 while len(lines) > 0: 715 l = lines[0] 716 while len(l) > 0 and l[0] == '*': 717 l = l[1:] 718 l = string.strip(l) 719 desc = desc + " " + l 720 del lines[0] 721 722 desc = string.strip(desc) 723 724 if quiet == 0: 725 if desc == "": 726 self.warning("Type comment for %s lack description of the macro" % (name)) 727 728 return(desc) 729 # 730 # Parse a comment block associate to a macro 731 # 732 def parseMacroComment(self, name, quiet = 0): 733 if name[0:2] == '__': 734 quiet = 1 735 736 args = [] 737 desc = "" 738 739 if self.comment == None: 740 if not quiet: 741 self.warning("Missing comment for macro %s" % (name)) 742 return((args, desc)) 743 if self.comment[0] != '*': 744 if not quiet: 745 self.warning("Missing * in macro comment for %s" % (name)) 746 return((args, desc)) 747 lines = string.split(self.comment, '\n') 748 if lines[0] == '*': 749 del lines[0] 750 if lines[0] != "* %s:" % (name): 751 if not quiet: 752 self.warning("Misformatted macro comment for %s" % (name)) 753 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 754 return((args, desc)) 755 del lines[0] 756 while lines[0] == '*': 757 del lines[0] 758 while len(lines) > 0 and lines[0][0:3] == '* @': 759 l = lines[0][3:] 760 try: 761 (arg, desc) = string.split(l, ':', 1) 762 desc=string.strip(desc) 763 arg=string.strip(arg) 764 except: 765 if not quiet: 766 self.warning("Misformatted macro comment for %s" % (name)) 767 self.warning(" problem with '%s'" % (lines[0])) 768 del lines[0] 769 continue 770 del lines[0] 771 l = string.strip(lines[0]) 772 while len(l) > 2 and l[0:3] != '* @': 773 while l[0] == '*': 774 l = l[1:] 775 desc = desc + ' ' + string.strip(l) 776 del lines[0] 777 if len(lines) == 0: 778 break 779 l = lines[0] 780 args.append((arg, desc)) 781 while len(lines) > 0 and lines[0] == '*': 782 del lines[0] 783 desc = "" 784 while len(lines) > 0: 785 l = lines[0] 786 while len(l) > 0 and l[0] == '*': 787 l = l[1:] 788 l = string.strip(l) 789 desc = desc + " " + l 790 del lines[0] 791 792 desc = string.strip(desc) 793 794 if quiet == 0: 795 if desc == "": 796 self.warning("Macro comment for %s lack description of the macro" % (name)) 797 798 return((args, desc)) 799 800 # 801 # Parse a comment block and merge the informations found in the 802 # parameters descriptions, finally returns a block as complete 803 # as possible 804 # 805 def mergeFunctionComment(self, name, description, quiet = 0): 806 if name == 'main': 807 quiet = 1 808 if name[0:2] == '__': 809 quiet = 1 810 811 (ret, args) = description 812 desc = "" 813 retdesc = "" 814 815 if self.comment == None: 816 if not quiet: 817 self.warning("Missing comment for function %s" % (name)) 818 return(((ret[0], retdesc), args, desc)) 819 if self.comment[0] != '*': 820 if not quiet: 821 self.warning("Missing * in function comment for %s" % (name)) 822 return(((ret[0], retdesc), args, desc)) 823 lines = string.split(self.comment, '\n') 824 if lines[0] == '*': 825 del lines[0] 826 if lines[0] != "* %s:" % (name): 827 if not quiet: 828 self.warning("Misformatted function comment for %s" % (name)) 829 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 830 return(((ret[0], retdesc), args, desc)) 831 del lines[0] 832 while lines[0] == '*': 833 del lines[0] 834 nbargs = len(args) 835 while len(lines) > 0 and lines[0][0:3] == '* @': 836 l = lines[0][3:] 837 try: 838 (arg, desc) = string.split(l, ':', 1) 839 desc=string.strip(desc) 840 arg=string.strip(arg) 841 except: 842 if not quiet: 843 self.warning("Misformatted function comment for %s" % (name)) 844 self.warning(" problem with '%s'" % (lines[0])) 845 del lines[0] 846 continue 847 del lines[0] 848 l = string.strip(lines[0]) 849 while len(l) > 2 and l[0:3] != '* @': 850 while l[0] == '*': 851 l = l[1:] 852 desc = desc + ' ' + string.strip(l) 853 del lines[0] 854 if len(lines) == 0: 855 break 856 l = lines[0] 857 i = 0 858 while i < nbargs: 859 if args[i][1] == arg: 860 args[i] = (args[i][0], arg, desc) 861 break; 862 i = i + 1 863 if i >= nbargs: 864 if not quiet: 865 self.warning("Unable to find arg %s from function comment for %s" % ( 866 arg, name)) 867 while len(lines) > 0 and lines[0] == '*': 868 del lines[0] 869 desc = "" 870 while len(lines) > 0: 871 l = lines[0] 872 while len(l) > 0 and l[0] == '*': 873 l = l[1:] 874 l = string.strip(l) 875 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return": 876 try: 877 l = string.split(l, ' ', 1)[1] 878 except: 879 l = "" 880 retdesc = string.strip(l) 881 del lines[0] 882 while len(lines) > 0: 883 l = lines[0] 884 while len(l) > 0 and l[0] == '*': 885 l = l[1:] 886 l = string.strip(l) 887 retdesc = retdesc + " " + l 888 del lines[0] 889 else: 890 desc = desc + " " + l 891 del lines[0] 892 893 retdesc = string.strip(retdesc) 894 desc = string.strip(desc) 895 896 if quiet == 0: 897 # 898 # report missing comments 899 # 900 i = 0 901 while i < nbargs: 902 if args[i][2] == None and args[i][0] != "void" and \ 903 ((args[i][1] != None) or (args[i][1] == '')): 904 self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1])) 905 i = i + 1 906 if retdesc == "" and ret[0] != "void": 907 self.warning("Function comment for %s lacks description of return value" % (name)) 908 if desc == "": 909 self.warning("Function comment for %s lacks description of the function" % (name)) 910 911 return(((ret[0], retdesc), args, desc)) 912 913 def parsePreproc(self, token): 914 if debug: 915 print "=> preproc ", token, self.lexer.tokens 916 name = token[1] 917 if name == "#include": 918 token = self.lexer.token() 919 if token == None: 920 return None 921 if token[0] == 'preproc': 922 self.index_add(token[1], self.filename, not self.is_header, 923 "include") 924 return self.lexer.token() 925 return token 926 if name == "#define": 927 token = self.lexer.token() 928 if token == None: 929 return None 930 if token[0] == 'preproc': 931 # TODO macros with arguments 932 name = token[1] 933 lst = [] 934 token = self.lexer.token() 935 while token != None and token[0] == 'preproc' and \ 936 token[1][0] != '#': 937 lst.append(token[1]) 938 token = self.lexer.token() 939 try: 940 name = string.split(name, '(') [0] 941 except: 942 pass 943 info = self.parseMacroComment(name, not self.is_header) 944 self.index_add(name, self.filename, not self.is_header, 945 "macro", info) 946 return token 947 948 # 949 # Processing of conditionals modified by Bill 1/1/05 950 # 951 # We process conditionals (i.e. tokens from #ifdef, #ifndef, 952 # #if, #else and #endif) for headers and mainline code, 953 # store the ones from the header in libxml2-api.xml, and later 954 # (in the routine merge_public) verify that the two (header and 955 # mainline code) agree. 956 # 957 # There is a small problem with processing the headers. Some of 958 # the variables are not concerned with enabling / disabling of 959 # library functions (e.g. '__XML_PARSER_H__'), and we don't want 960 # them to be included in libxml2-api.xml, or involved in 961 # the check between the header and the mainline code. To 962 # accomplish this, we ignore any conditional which doesn't include 963 # the string 'ENABLED' 964 # 965 if name == "#ifdef": 966 apstr = self.lexer.tokens[0][1] 967 try: 968 self.defines.append(apstr) 969 if string.find(apstr, 'ENABLED') != -1: 970 self.conditionals.append("defined(%s)" % apstr) 971 except: 972 pass 973 elif name == "#ifndef": 974 apstr = self.lexer.tokens[0][1] 975 try: 976 self.defines.append(apstr) 977 if string.find(apstr, 'ENABLED') != -1: 978 self.conditionals.append("!defined(%s)" % apstr) 979 except: 980 pass 981 elif name == "#if": 982 apstr = "" 983 for tok in self.lexer.tokens: 984 if apstr != "": 985 apstr = apstr + " " 986 apstr = apstr + tok[1] 987 try: 988 self.defines.append(apstr) 989 if string.find(apstr, 'ENABLED') != -1: 990 self.conditionals.append(apstr) 991 except: 992 pass 993 elif name == "#else": 994 if self.conditionals != [] and \ 995 string.find(self.defines[-1], 'ENABLED') != -1: 996 self.conditionals[-1] = "!(%s)" % self.conditionals[-1] 997 elif name == "#endif": 998 if self.conditionals != [] and \ 999 string.find(self.defines[-1], 'ENABLED') != -1: 1000 self.conditionals = self.conditionals[:-1] 1001 self.defines = self.defines[:-1] 1002 token = self.lexer.token() 1003 while token != None and token[0] == 'preproc' and \ 1004 token[1][0] != '#': 1005 token = self.lexer.token() 1006 return token 1007 1008 # 1009 # token acquisition on top of the lexer, it handle internally 1010 # preprocessor and comments since they are logically not part of 1011 # the program structure. 1012 # 1013 def token(self): 1014 global ignored_words 1015 1016 token = self.lexer.token() 1017 while token != None: 1018 if token[0] == 'comment': 1019 token = self.parseComment(token) 1020 continue 1021 elif token[0] == 'preproc': 1022 token = self.parsePreproc(token) 1023 continue 1024 elif token[0] == "name" and token[1] == "__const": 1025 token = ("name", "const") 1026 return token 1027 elif token[0] == "name" and token[1] == "__attribute": 1028 token = self.lexer.token() 1029 while token != None and token[1] != ";": 1030 token = self.lexer.token() 1031 return token 1032 elif token[0] == "name" and ignored_words.has_key(token[1]): 1033 (n, info) = ignored_words[token[1]] 1034 i = 0 1035 while i < n: 1036 token = self.lexer.token() 1037 i = i + 1 1038 token = self.lexer.token() 1039 continue 1040 else: 1041 if debug: 1042 print "=> ", token 1043 return token 1044 return None 1045 1046 # 1047 # Parse a typedef, it records the type and its name. 1048 # 1049 def parseTypedef(self, token): 1050 if token == None: 1051 return None 1052 token = self.parseType(token) 1053 if token == None: 1054 self.error("parsing typedef") 1055 return None 1056 base_type = self.type 1057 type = base_type 1058 #self.debug("end typedef type", token) 1059 while token != None: 1060 if token[0] == "name": 1061 name = token[1] 1062 signature = self.signature 1063 if signature != None: 1064 type = string.split(type, '(')[0] 1065 d = self.mergeFunctionComment(name, 1066 ((type, None), signature), 1) 1067 self.index_add(name, self.filename, not self.is_header, 1068 "functype", d) 1069 else: 1070 if base_type == "struct": 1071 self.index_add(name, self.filename, not self.is_header, 1072 "struct", type) 1073 base_type = "struct " + name 1074 else: 1075 # TODO report missing or misformatted comments 1076 info = self.parseTypeComment(name, 1) 1077 self.index_add(name, self.filename, not self.is_header, 1078 "typedef", type, info) 1079 token = self.token() 1080 else: 1081 self.error("parsing typedef: expecting a name") 1082 return token 1083 #self.debug("end typedef", token) 1084 if token != None and token[0] == 'sep' and token[1] == ',': 1085 type = base_type 1086 token = self.token() 1087 while token != None and token[0] == "op": 1088 type = type + token[1] 1089 token = self.token() 1090 elif token != None and token[0] == 'sep' and token[1] == ';': 1091 break; 1092 elif token != None and token[0] == 'name': 1093 type = base_type 1094 continue; 1095 else: 1096 self.error("parsing typedef: expecting ';'", token) 1097 return token 1098 token = self.token() 1099 return token 1100 1101 # 1102 # Parse a C code block, used for functions it parse till 1103 # the balancing } included 1104 # 1105 def parseBlock(self, token): 1106 while token != None: 1107 if token[0] == "sep" and token[1] == "{": 1108 token = self.token() 1109 token = self.parseBlock(token) 1110 elif token[0] == "sep" and token[1] == "}": 1111 self.comment = None 1112 token = self.token() 1113 return token 1114 else: 1115 if self.collect_ref == 1: 1116 oldtok = token 1117 token = self.token() 1118 if oldtok[0] == "name" and oldtok[1][0:3] == "xml": 1119 if token[0] == "sep" and token[1] == "(": 1120 self.index_add_ref(oldtok[1], self.filename, 1121 0, "function") 1122 token = self.token() 1123 elif token[0] == "name": 1124 token = self.token() 1125 if token[0] == "sep" and (token[1] == ";" or 1126 token[1] == "," or token[1] == "="): 1127 self.index_add_ref(oldtok[1], self.filename, 1128 0, "type") 1129 elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_": 1130 self.index_add_ref(oldtok[1], self.filename, 1131 0, "typedef") 1132 elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_": 1133 self.index_add_ref(oldtok[1], self.filename, 1134 0, "typedef") 1135 1136 else: 1137 token = self.token() 1138 return token 1139 1140 # 1141 # Parse a C struct definition till the balancing } 1142 # 1143 def parseStruct(self, token): 1144 fields = [] 1145 #self.debug("start parseStruct", token) 1146 while token != None: 1147 if token[0] == "sep" and token[1] == "{": 1148 token = self.token() 1149 token = self.parseTypeBlock(token) 1150 elif token[0] == "sep" and token[1] == "}": 1151 self.struct_fields = fields 1152 #self.debug("end parseStruct", token) 1153 #print fields 1154 token = self.token() 1155 return token 1156 else: 1157 base_type = self.type 1158 #self.debug("before parseType", token) 1159 token = self.parseType(token) 1160 #self.debug("after parseType", token) 1161 if token != None and token[0] == "name": 1162 fname = token[1] 1163 token = self.token() 1164 if token[0] == "sep" and token[1] == ";": 1165 self.comment = None 1166 token = self.token() 1167 fields.append((self.type, fname, self.comment)) 1168 self.comment = None 1169 else: 1170 self.error("parseStruct: expecting ;", token) 1171 elif token != None and token[0] == "sep" and token[1] == "{": 1172 token = self.token() 1173 token = self.parseTypeBlock(token) 1174 if token != None and token[0] == "name": 1175 token = self.token() 1176 if token != None and token[0] == "sep" and token[1] == ";": 1177 token = self.token() 1178 else: 1179 self.error("parseStruct: expecting ;", token) 1180 else: 1181 self.error("parseStruct: name", token) 1182 token = self.token() 1183 self.type = base_type; 1184 self.struct_fields = fields 1185 #self.debug("end parseStruct", token) 1186 #print fields 1187 return token 1188 1189 # 1190 # Parse a C enum block, parse till the balancing } 1191 # 1192 def parseEnumBlock(self, token): 1193 self.enums = [] 1194 name = None 1195 self.comment = None 1196 comment = "" 1197 value = "0" 1198 while token != None: 1199 if token[0] == "sep" and token[1] == "{": 1200 token = self.token() 1201 token = self.parseTypeBlock(token) 1202 elif token[0] == "sep" and token[1] == "}": 1203 if name != None: 1204 if self.comment != None: 1205 comment = self.comment 1206 self.comment = None 1207 self.enums.append((name, value, comment)) 1208 token = self.token() 1209 return token 1210 elif token[0] == "name": 1211 if name != None: 1212 if self.comment != None: 1213 comment = string.strip(self.comment) 1214 self.comment = None 1215 self.enums.append((name, value, comment)) 1216 name = token[1] 1217 comment = "" 1218 token = self.token() 1219 if token[0] == "op" and token[1][0] == "=": 1220 value = "" 1221 if len(token[1]) > 1: 1222 value = token[1][1:] 1223 token = self.token() 1224 while token[0] != "sep" or (token[1] != ',' and 1225 token[1] != '}'): 1226 value = value + token[1] 1227 token = self.token() 1228 else: 1229 try: 1230 value = "%d" % (int(value) + 1) 1231 except: 1232 self.warning("Failed to compute value of enum %s" % (name)) 1233 value="" 1234 if token[0] == "sep" and token[1] == ",": 1235 token = self.token() 1236 else: 1237 token = self.token() 1238 return token 1239 1240 # 1241 # Parse a C definition block, used for structs it parse till 1242 # the balancing } 1243 # 1244 def parseTypeBlock(self, token): 1245 while token != None: 1246 if token[0] == "sep" and token[1] == "{": 1247 token = self.token() 1248 token = self.parseTypeBlock(token) 1249 elif token[0] == "sep" and token[1] == "}": 1250 token = self.token() 1251 return token 1252 else: 1253 token = self.token() 1254 return token 1255 1256 # 1257 # Parse a type: the fact that the type name can either occur after 1258 # the definition or within the definition makes it a little harder 1259 # if inside, the name token is pushed back before returning 1260 # 1261 def parseType(self, token): 1262 self.type = "" 1263 self.struct_fields = [] 1264 self.signature = None 1265 if token == None: 1266 return token 1267 1268 while token[0] == "name" and ( 1269 token[1] == "const" or \ 1270 token[1] == "unsigned" or \ 1271 token[1] == "signed"): 1272 if self.type == "": 1273 self.type = token[1] 1274 else: 1275 self.type = self.type + " " + token[1] 1276 token = self.token() 1277 1278 if token[0] == "name" and (token[1] == "long" or token[1] == "short"): 1279 if self.type == "": 1280 self.type = token[1] 1281 else: 1282 self.type = self.type + " " + token[1] 1283 if token[0] == "name" and token[1] == "int": 1284 if self.type == "": 1285 self.type = tmp[1] 1286 else: 1287 self.type = self.type + " " + tmp[1] 1288 1289 elif token[0] == "name" and token[1] == "struct": 1290 if self.type == "": 1291 self.type = token[1] 1292 else: 1293 self.type = self.type + " " + token[1] 1294 token = self.token() 1295 nametok = None 1296 if token[0] == "name": 1297 nametok = token 1298 token = self.token() 1299 if token != None and token[0] == "sep" and token[1] == "{": 1300 token = self.token() 1301 token = self.parseStruct(token) 1302 elif token != None and token[0] == "op" and token[1] == "*": 1303 self.type = self.type + " " + nametok[1] + " *" 1304 token = self.token() 1305 while token != None and token[0] == "op" and token[1] == "*": 1306 self.type = self.type + " *" 1307 token = self.token() 1308 if token[0] == "name": 1309 nametok = token 1310 token = self.token() 1311 else: 1312 self.error("struct : expecting name", token) 1313 return token 1314 elif token != None and token[0] == "name" and nametok != None: 1315 self.type = self.type + " " + nametok[1] 1316 return token 1317 1318 if nametok != None: 1319 self.lexer.push(token) 1320 token = nametok 1321 return token 1322 1323 elif token[0] == "name" and token[1] == "enum": 1324 if self.type == "": 1325 self.type = token[1] 1326 else: 1327 self.type = self.type + " " + token[1] 1328 self.enums = [] 1329 token = self.token() 1330 if token != None and token[0] == "sep" and token[1] == "{": 1331 token = self.token() 1332 token = self.parseEnumBlock(token) 1333 else: 1334 self.error("parsing enum: expecting '{'", token) 1335 enum_type = None 1336 if token != None and token[0] != "name": 1337 self.lexer.push(token) 1338 token = ("name", "enum") 1339 else: 1340 enum_type = token[1] 1341 for enum in self.enums: 1342 self.index_add(enum[0], self.filename, 1343 not self.is_header, "enum", 1344 (enum[1], enum[2], enum_type)) 1345 return token 1346 1347 elif token[0] == "name": 1348 if self.type == "": 1349 self.type = token[1] 1350 else: 1351 self.type = self.type + " " + token[1] 1352 else: 1353 self.error("parsing type %s: expecting a name" % (self.type), 1354 token) 1355 return token 1356 token = self.token() 1357 while token != None and (token[0] == "op" or 1358 token[0] == "name" and token[1] == "const"): 1359 self.type = self.type + " " + token[1] 1360 token = self.token() 1361 1362 # 1363 # if there is a parenthesis here, this means a function type 1364 # 1365 if token != None and token[0] == "sep" and token[1] == '(': 1366 self.type = self.type + token[1] 1367 token = self.token() 1368 while token != None and token[0] == "op" and token[1] == '*': 1369 self.type = self.type + token[1] 1370 token = self.token() 1371 if token == None or token[0] != "name" : 1372 self.error("parsing function type, name expected", token); 1373 return token 1374 self.type = self.type + token[1] 1375 nametok = token 1376 token = self.token() 1377 if token != None and token[0] == "sep" and token[1] == ')': 1378 self.type = self.type + token[1] 1379 token = self.token() 1380 if token != None and token[0] == "sep" and token[1] == '(': 1381 token = self.token() 1382 type = self.type; 1383 token = self.parseSignature(token); 1384 self.type = type; 1385 else: 1386 self.error("parsing function type, '(' expected", token); 1387 return token 1388 else: 1389 self.error("parsing function type, ')' expected", token); 1390 return token 1391 self.lexer.push(token) 1392 token = nametok 1393 return token 1394 1395 # 1396 # do some lookahead for arrays 1397 # 1398 if token != None and token[0] == "name": 1399 nametok = token 1400 token = self.token() 1401 if token != None and token[0] == "sep" and token[1] == '[': 1402 self.type = self.type + nametok[1] 1403 while token != None and token[0] == "sep" and token[1] == '[': 1404 self.type = self.type + token[1] 1405 token = self.token() 1406 while token != None and token[0] != 'sep' and \ 1407 token[1] != ']' and token[1] != ';': 1408 self.type = self.type + token[1] 1409 token = self.token() 1410 if token != None and token[0] == 'sep' and token[1] == ']': 1411 self.type = self.type + token[1] 1412 token = self.token() 1413 else: 1414 self.error("parsing array type, ']' expected", token); 1415 return token 1416 elif token != None and token[0] == "sep" and token[1] == ':': 1417 # remove :12 in case it's a limited int size 1418 token = self.token() 1419 token = self.token() 1420 self.lexer.push(token) 1421 token = nametok 1422 1423 return token 1424 1425 # 1426 # Parse a signature: '(' has been parsed and we scan the type definition 1427 # up to the ')' included 1428 def parseSignature(self, token): 1429 signature = [] 1430 if token != None and token[0] == "sep" and token[1] == ')': 1431 self.signature = [] 1432 token = self.token() 1433 return token 1434 while token != None: 1435 token = self.parseType(token) 1436 if token != None and token[0] == "name": 1437 signature.append((self.type, token[1], None)) 1438 token = self.token() 1439 elif token != None and token[0] == "sep" and token[1] == ',': 1440 token = self.token() 1441 continue 1442 elif token != None and token[0] == "sep" and token[1] == ')': 1443 # only the type was provided 1444 if self.type == "...": 1445 signature.append((self.type, "...", None)) 1446 else: 1447 signature.append((self.type, None, None)) 1448 if token != None and token[0] == "sep": 1449 if token[1] == ',': 1450 token = self.token() 1451 continue 1452 elif token[1] == ')': 1453 token = self.token() 1454 break 1455 self.signature = signature 1456 return token 1457 1458 # 1459 # Parse a global definition, be it a type, variable or function 1460 # the extern "C" blocks are a bit nasty and require it to recurse. 1461 # 1462 def parseGlobal(self, token): 1463 static = 0 1464 if token[1] == 'extern': 1465 token = self.token() 1466 if token == None: 1467 return token 1468 if token[0] == 'string': 1469 if token[1] == 'C': 1470 token = self.token() 1471 if token == None: 1472 return token 1473 if token[0] == 'sep' and token[1] == "{": 1474 token = self.token() 1475# print 'Entering extern "C line ', self.lineno() 1476 while token != None and (token[0] != 'sep' or 1477 token[1] != "}"): 1478 if token[0] == 'name': 1479 token = self.parseGlobal(token) 1480 else: 1481 self.error( 1482 "token %s %s unexpected at the top level" % ( 1483 token[0], token[1])) 1484 token = self.parseGlobal(token) 1485# print 'Exiting extern "C" line', self.lineno() 1486 token = self.token() 1487 return token 1488 else: 1489 return token 1490 elif token[1] == 'static': 1491 static = 1 1492 token = self.token() 1493 if token == None or token[0] != 'name': 1494 return token 1495 1496 if token[1] == 'typedef': 1497 token = self.token() 1498 return self.parseTypedef(token) 1499 else: 1500 token = self.parseType(token) 1501 type_orig = self.type 1502 if token == None or token[0] != "name": 1503 return token 1504 type = type_orig 1505 self.name = token[1] 1506 token = self.token() 1507 while token != None and (token[0] == "sep" or token[0] == "op"): 1508 if token[0] == "sep": 1509 if token[1] == "[": 1510 type = type + token[1] 1511 token = self.token() 1512 while token != None and (token[0] != "sep" or \ 1513 token[1] != ";"): 1514 type = type + token[1] 1515 token = self.token() 1516 1517 if token != None and token[0] == "op" and token[1] == "=": 1518 # 1519 # Skip the initialization of the variable 1520 # 1521 token = self.token() 1522 if token[0] == 'sep' and token[1] == '{': 1523 token = self.token() 1524 token = self.parseBlock(token) 1525 else: 1526 self.comment = None 1527 while token != None and (token[0] != "sep" or \ 1528 (token[1] != ';' and token[1] != ',')): 1529 token = self.token() 1530 self.comment = None 1531 if token == None or token[0] != "sep" or (token[1] != ';' and 1532 token[1] != ','): 1533 self.error("missing ';' or ',' after value") 1534 1535 if token != None and token[0] == "sep": 1536 if token[1] == ";": 1537 self.comment = None 1538 token = self.token() 1539 if type == "struct": 1540 self.index_add(self.name, self.filename, 1541 not self.is_header, "struct", self.struct_fields) 1542 else: 1543 self.index_add(self.name, self.filename, 1544 not self.is_header, "variable", type) 1545 break 1546 elif token[1] == "(": 1547 token = self.token() 1548 token = self.parseSignature(token) 1549 if token == None: 1550 return None 1551 if token[0] == "sep" and token[1] == ";": 1552 d = self.mergeFunctionComment(self.name, 1553 ((type, None), self.signature), 1) 1554 self.index_add(self.name, self.filename, static, 1555 "function", d) 1556 token = self.token() 1557 elif token[0] == "sep" and token[1] == "{": 1558 d = self.mergeFunctionComment(self.name, 1559 ((type, None), self.signature), static) 1560 self.index_add(self.name, self.filename, static, 1561 "function", d) 1562 token = self.token() 1563 token = self.parseBlock(token); 1564 elif token[1] == ',': 1565 self.comment = None 1566 self.index_add(self.name, self.filename, static, 1567 "variable", type) 1568 type = type_orig 1569 token = self.token() 1570 while token != None and token[0] == "sep": 1571 type = type + token[1] 1572 token = self.token() 1573 if token != None and token[0] == "name": 1574 self.name = token[1] 1575 token = self.token() 1576 else: 1577 break 1578 1579 return token 1580 1581 def parse(self): 1582 self.warning("Parsing %s" % (self.filename)) 1583 token = self.token() 1584 while token != None: 1585 if token[0] == 'name': 1586 token = self.parseGlobal(token) 1587 else: 1588 self.error("token %s %s unexpected at the top level" % ( 1589 token[0], token[1])) 1590 token = self.parseGlobal(token) 1591 return 1592 self.parseTopComment(self.top_comment) 1593 return self.index 1594 1595 1596class docBuilder: 1597 """A documentation builder""" 1598 def __init__(self, name, directories=['.'], excludes=[]): 1599 self.name = name 1600 self.directories = directories 1601 self.excludes = excludes + ignored_files.keys() 1602 self.modules = {} 1603 self.headers = {} 1604 self.idx = index() 1605 self.xref = {} 1606 self.index = {} 1607 if name == 'libxml2': 1608 self.basename = 'libxml' 1609 else: 1610 self.basename = name 1611 1612 def indexString(self, id, str): 1613 if str == None: 1614 return 1615 str = string.replace(str, "'", ' ') 1616 str = string.replace(str, '"', ' ') 1617 str = string.replace(str, "/", ' ') 1618 str = string.replace(str, '*', ' ') 1619 str = string.replace(str, "[", ' ') 1620 str = string.replace(str, "]", ' ') 1621 str = string.replace(str, "(", ' ') 1622 str = string.replace(str, ")", ' ') 1623 str = string.replace(str, "<", ' ') 1624 str = string.replace(str, '>', ' ') 1625 str = string.replace(str, "&", ' ') 1626 str = string.replace(str, '#', ' ') 1627 str = string.replace(str, ",", ' ') 1628 str = string.replace(str, '.', ' ') 1629 str = string.replace(str, ';', ' ') 1630 tokens = string.split(str) 1631 for token in tokens: 1632 try: 1633 c = token[0] 1634 if string.find(string.letters, c) < 0: 1635 pass 1636 elif len(token) < 3: 1637 pass 1638 else: 1639 lower = string.lower(token) 1640 # TODO: generalize this a bit 1641 if lower == 'and' or lower == 'the': 1642 pass 1643 elif self.xref.has_key(token): 1644 self.xref[token].append(id) 1645 else: 1646 self.xref[token] = [id] 1647 except: 1648 pass 1649 1650 def analyze(self): 1651 print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys())) 1652 self.idx.analyze() 1653 1654 def scanHeaders(self): 1655 for header in self.headers.keys(): 1656 parser = CParser(header) 1657 idx = parser.parse() 1658 self.headers[header] = idx; 1659 self.idx.merge(idx) 1660 1661 def scanModules(self): 1662 for module in self.modules.keys(): 1663 parser = CParser(module) 1664 idx = parser.parse() 1665 # idx.analyze() 1666 self.modules[module] = idx 1667 self.idx.merge_public(idx) 1668 1669 def scan(self): 1670 for directory in self.directories: 1671 files = glob.glob(directory + "/*.c") 1672 for file in files: 1673 skip = 0 1674 for excl in self.excludes: 1675 if string.find(file, excl) != -1: 1676 skip = 1; 1677 break 1678 if skip == 0: 1679 self.modules[file] = None; 1680 files = glob.glob(directory + "/*.h") 1681 for file in files: 1682 skip = 0 1683 for excl in self.excludes: 1684 if string.find(file, excl) != -1: 1685 skip = 1; 1686 break 1687 if skip == 0: 1688 self.headers[file] = None; 1689 self.scanHeaders() 1690 self.scanModules() 1691 1692 def modulename_file(self, file): 1693 module = os.path.basename(file) 1694 if module[-2:] == '.h': 1695 module = module[:-2] 1696 elif module[-2:] == '.c': 1697 module = module[:-2] 1698 return module 1699 1700 def serialize_enum(self, output, name): 1701 id = self.idx.enums[name] 1702 output.write(" <enum name='%s' file='%s'" % (name, 1703 self.modulename_file(id.header))) 1704 if id.info != None: 1705 info = id.info 1706 if info[0] != None and info[0] != '': 1707 try: 1708 val = eval(info[0]) 1709 except: 1710 val = info[0] 1711 output.write(" value='%s'" % (val)); 1712 if info[2] != None and info[2] != '': 1713 output.write(" type='%s'" % info[2]); 1714 if info[1] != None and info[1] != '': 1715 output.write(" info='%s'" % escape(info[1])); 1716 output.write("/>\n") 1717 1718 def serialize_macro(self, output, name): 1719 id = self.idx.macros[name] 1720 output.write(" <macro name='%s' file='%s'>\n" % (name, 1721 self.modulename_file(id.header))) 1722 if id.info != None: 1723 try: 1724 (args, desc) = id.info 1725 if desc != None and desc != "": 1726 output.write(" <info>%s</info>\n" % (escape(desc))) 1727 self.indexString(name, desc) 1728 for arg in args: 1729 (name, desc) = arg 1730 if desc != None and desc != "": 1731 output.write(" <arg name='%s' info='%s'/>\n" % ( 1732 name, escape(desc))) 1733 self.indexString(name, desc) 1734 else: 1735 output.write(" <arg name='%s'/>\n" % (name)) 1736 except: 1737 pass 1738 output.write(" </macro>\n") 1739 1740 def serialize_typedef(self, output, name): 1741 id = self.idx.typedefs[name] 1742 if id.info[0:7] == 'struct ': 1743 output.write(" <struct name='%s' file='%s' type='%s'" % ( 1744 name, self.modulename_file(id.header), id.info)) 1745 name = id.info[7:] 1746 if self.idx.structs.has_key(name) and ( \ 1747 type(self.idx.structs[name].info) == type(()) or 1748 type(self.idx.structs[name].info) == type([])): 1749 output.write(">\n"); 1750 try: 1751 for field in self.idx.structs[name].info: 1752 desc = field[2] 1753 self.indexString(name, desc) 1754 if desc == None: 1755 desc = '' 1756 else: 1757 desc = escape(desc) 1758 output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc)) 1759 except: 1760 print "Failed to serialize struct %s" % (name) 1761 output.write(" </struct>\n") 1762 else: 1763 output.write("/>\n"); 1764 else : 1765 output.write(" <typedef name='%s' file='%s' type='%s'" % ( 1766 name, self.modulename_file(id.header), id.info)) 1767 try: 1768 desc = id.extra 1769 if desc != None and desc != "": 1770 output.write(">\n <info>%s</info>\n" % (escape(desc))) 1771 output.write(" </typedef>\n") 1772 else: 1773 output.write("/>\n") 1774 except: 1775 output.write("/>\n") 1776 1777 def serialize_variable(self, output, name): 1778 id = self.idx.variables[name] 1779 if id.info != None: 1780 output.write(" <variable name='%s' file='%s' type='%s'/>\n" % ( 1781 name, self.modulename_file(id.header), id.info)) 1782 else: 1783 output.write(" <variable name='%s' file='%s'/>\n" % ( 1784 name, self.modulename_file(id.header))) 1785 1786 def serialize_function(self, output, name): 1787 id = self.idx.functions[name] 1788 if name == debugsym: 1789 print "=>", id 1790 1791 output.write(" <%s name='%s' file='%s' module='%s'>\n" % (id.type, 1792 name, self.modulename_file(id.header), 1793 self.modulename_file(id.module))) 1794 # 1795 # Processing of conditionals modified by Bill 1/1/05 1796 # 1797 if id.conditionals != None: 1798 apstr = "" 1799 for cond in id.conditionals: 1800 if apstr != "": 1801 apstr = apstr + " && " 1802 apstr = apstr + cond 1803 output.write(" <cond>%s</cond>\n"% (apstr)); 1804 try: 1805 (ret, params, desc) = id.info 1806 if (desc == None or desc == '') and \ 1807 name[0:9] != "xmlThrDef" and name != "xmlDllMain": 1808 print "%s %s from %s has no description" % (id.type, name, 1809 self.modulename_file(id.module)) 1810 1811 output.write(" <info>%s</info>\n" % (escape(desc))) 1812 self.indexString(name, desc) 1813 if ret[0] != None: 1814 if ret[0] == "void": 1815 output.write(" <return type='void'/>\n") 1816 else: 1817 output.write(" <return type='%s' info='%s'/>\n" % ( 1818 ret[0], escape(ret[1]))) 1819 self.indexString(name, ret[1]) 1820 for param in params: 1821 if param[0] == 'void': 1822 continue 1823 if param[2] == None: 1824 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0])) 1825 else: 1826 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2]))) 1827 self.indexString(name, param[2]) 1828 except: 1829 print "Failed to save function %s info: " % name, `id.info` 1830 output.write(" </%s>\n" % (id.type)) 1831 1832 def serialize_exports(self, output, file): 1833 module = self.modulename_file(file) 1834 output.write(" <file name='%s'>\n" % (module)) 1835 dict = self.headers[file] 1836 if dict.info != None: 1837 for data in ('Summary', 'Description', 'Author'): 1838 try: 1839 output.write(" <%s>%s</%s>\n" % ( 1840 string.lower(data), 1841 escape(dict.info[data]), 1842 string.lower(data))) 1843 except: 1844 print "Header %s lacks a %s description" % (module, data) 1845 if dict.info.has_key('Description'): 1846 desc = dict.info['Description'] 1847 if string.find(desc, "DEPRECATED") != -1: 1848 output.write(" <deprecated/>\n") 1849 1850 ids = dict.macros.keys() 1851 ids.sort() 1852 for id in uniq(ids): 1853 # Macros are sometime used to masquerade other types. 1854 if dict.functions.has_key(id): 1855 continue 1856 if dict.variables.has_key(id): 1857 continue 1858 if dict.typedefs.has_key(id): 1859 continue 1860 if dict.structs.has_key(id): 1861 continue 1862 if dict.enums.has_key(id): 1863 continue 1864 output.write(" <exports symbol='%s' type='macro'/>\n" % (id)) 1865 ids = dict.enums.keys() 1866 ids.sort() 1867 for id in uniq(ids): 1868 output.write(" <exports symbol='%s' type='enum'/>\n" % (id)) 1869 ids = dict.typedefs.keys() 1870 ids.sort() 1871 for id in uniq(ids): 1872 output.write(" <exports symbol='%s' type='typedef'/>\n" % (id)) 1873 ids = dict.structs.keys() 1874 ids.sort() 1875 for id in uniq(ids): 1876 output.write(" <exports symbol='%s' type='struct'/>\n" % (id)) 1877 ids = dict.variables.keys() 1878 ids.sort() 1879 for id in uniq(ids): 1880 output.write(" <exports symbol='%s' type='variable'/>\n" % (id)) 1881 ids = dict.functions.keys() 1882 ids.sort() 1883 for id in uniq(ids): 1884 output.write(" <exports symbol='%s' type='function'/>\n" % (id)) 1885 output.write(" </file>\n") 1886 1887 def serialize_xrefs_files(self, output): 1888 headers = self.headers.keys() 1889 headers.sort() 1890 for file in headers: 1891 module = self.modulename_file(file) 1892 output.write(" <file name='%s'>\n" % (module)) 1893 dict = self.headers[file] 1894 ids = uniq(dict.functions.keys() + dict.variables.keys() + \ 1895 dict.macros.keys() + dict.typedefs.keys() + \ 1896 dict.structs.keys() + dict.enums.keys()) 1897 ids.sort() 1898 for id in ids: 1899 output.write(" <ref name='%s'/>\n" % (id)) 1900 output.write(" </file>\n") 1901 pass 1902 1903 def serialize_xrefs_functions(self, output): 1904 funcs = {} 1905 for name in self.idx.functions.keys(): 1906 id = self.idx.functions[name] 1907 try: 1908 (ret, params, desc) = id.info 1909 for param in params: 1910 if param[0] == 'void': 1911 continue 1912 if funcs.has_key(param[0]): 1913 funcs[param[0]].append(name) 1914 else: 1915 funcs[param[0]] = [name] 1916 except: 1917 pass 1918 typ = funcs.keys() 1919 typ.sort() 1920 for type in typ: 1921 if type == '' or type == 'void' or type == "int" or \ 1922 type == "char *" or type == "const char *" : 1923 continue 1924 output.write(" <type name='%s'>\n" % (type)) 1925 ids = funcs[type] 1926 ids.sort() 1927 pid = '' # not sure why we have dups, but get rid of them! 1928 for id in ids: 1929 if id != pid: 1930 output.write(" <ref name='%s'/>\n" % (id)) 1931 pid = id 1932 output.write(" </type>\n") 1933 1934 def serialize_xrefs_constructors(self, output): 1935 funcs = {} 1936 for name in self.idx.functions.keys(): 1937 id = self.idx.functions[name] 1938 try: 1939 (ret, params, desc) = id.info 1940 if ret[0] == "void": 1941 continue 1942 if funcs.has_key(ret[0]): 1943 funcs[ret[0]].append(name) 1944 else: 1945 funcs[ret[0]] = [name] 1946 except: 1947 pass 1948 typ = funcs.keys() 1949 typ.sort() 1950 for type in typ: 1951 if type == '' or type == 'void' or type == "int" or \ 1952 type == "char *" or type == "const char *" : 1953 continue 1954 output.write(" <type name='%s'>\n" % (type)) 1955 ids = funcs[type] 1956 ids.sort() 1957 for id in ids: 1958 output.write(" <ref name='%s'/>\n" % (id)) 1959 output.write(" </type>\n") 1960 1961 def serialize_xrefs_alpha(self, output): 1962 letter = None 1963 ids = self.idx.identifiers.keys() 1964 ids.sort() 1965 for id in ids: 1966 if id[0] != letter: 1967 if letter != None: 1968 output.write(" </letter>\n") 1969 letter = id[0] 1970 output.write(" <letter name='%s'>\n" % (letter)) 1971 output.write(" <ref name='%s'/>\n" % (id)) 1972 if letter != None: 1973 output.write(" </letter>\n") 1974 1975 def serialize_xrefs_references(self, output): 1976 typ = self.idx.identifiers.keys() 1977 typ.sort() 1978 for id in typ: 1979 idf = self.idx.identifiers[id] 1980 module = idf.header 1981 output.write(" <reference name='%s' href='%s'/>\n" % (id, 1982 'html/' + self.basename + '-' + 1983 self.modulename_file(module) + '.html#' + 1984 id)) 1985 1986 def serialize_xrefs_index(self, output): 1987 index = self.xref 1988 typ = index.keys() 1989 typ.sort() 1990 letter = None 1991 count = 0 1992 chunk = 0 1993 chunks = [] 1994 for id in typ: 1995 if len(index[id]) > 30: 1996 continue 1997 if id[0] != letter: 1998 if letter == None or count > 200: 1999 if letter != None: 2000 output.write(" </letter>\n") 2001 output.write(" </chunk>\n") 2002 count = 0 2003 chunks.append(["chunk%s" % (chunk -1), first_letter, letter]) 2004 output.write(" <chunk name='chunk%s'>\n" % (chunk)) 2005 first_letter = id[0] 2006 chunk = chunk + 1 2007 elif letter != None: 2008 output.write(" </letter>\n") 2009 letter = id[0] 2010 output.write(" <letter name='%s'>\n" % (letter)) 2011 output.write(" <word name='%s'>\n" % (id)) 2012 tokens = index[id]; 2013 tokens.sort() 2014 tok = None 2015 for token in tokens: 2016 if tok == token: 2017 continue 2018 tok = token 2019 output.write(" <ref name='%s'/>\n" % (token)) 2020 count = count + 1 2021 output.write(" </word>\n") 2022 if letter != None: 2023 output.write(" </letter>\n") 2024 output.write(" </chunk>\n") 2025 if count != 0: 2026 chunks.append(["chunk%s" % (chunk -1), first_letter, letter]) 2027 output.write(" <chunks>\n") 2028 for ch in chunks: 2029 output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % ( 2030 ch[0], ch[1], ch[2])) 2031 output.write(" </chunks>\n") 2032 2033 def serialize_xrefs(self, output): 2034 output.write(" <references>\n") 2035 self.serialize_xrefs_references(output) 2036 output.write(" </references>\n") 2037 output.write(" <alpha>\n") 2038 self.serialize_xrefs_alpha(output) 2039 output.write(" </alpha>\n") 2040 output.write(" <constructors>\n") 2041 self.serialize_xrefs_constructors(output) 2042 output.write(" </constructors>\n") 2043 output.write(" <functions>\n") 2044 self.serialize_xrefs_functions(output) 2045 output.write(" </functions>\n") 2046 output.write(" <files>\n") 2047 self.serialize_xrefs_files(output) 2048 output.write(" </files>\n") 2049 output.write(" <index>\n") 2050 self.serialize_xrefs_index(output) 2051 output.write(" </index>\n") 2052 2053 def serialize(self): 2054 filename = "%s-api.xml" % self.name 2055 print "Saving XML description %s" % (filename) 2056 output = open(filename, "w") 2057 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') 2058 output.write("<api name='%s'>\n" % self.name) 2059 output.write(" <files>\n") 2060 headers = self.headers.keys() 2061 headers.sort() 2062 for file in headers: 2063 self.serialize_exports(output, file) 2064 output.write(" </files>\n") 2065 output.write(" <symbols>\n") 2066 macros = self.idx.macros.keys() 2067 macros.sort() 2068 for macro in macros: 2069 self.serialize_macro(output, macro) 2070 enums = self.idx.enums.keys() 2071 enums.sort() 2072 for enum in enums: 2073 self.serialize_enum(output, enum) 2074 typedefs = self.idx.typedefs.keys() 2075 typedefs.sort() 2076 for typedef in typedefs: 2077 self.serialize_typedef(output, typedef) 2078 variables = self.idx.variables.keys() 2079 variables.sort() 2080 for variable in variables: 2081 self.serialize_variable(output, variable) 2082 functions = self.idx.functions.keys() 2083 functions.sort() 2084 for function in functions: 2085 self.serialize_function(output, function) 2086 output.write(" </symbols>\n") 2087 output.write("</api>\n") 2088 output.close() 2089 2090 filename = "%s-refs.xml" % self.name 2091 print "Saving XML Cross References %s" % (filename) 2092 output = open(filename, "w") 2093 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') 2094 output.write("<apirefs name='%s'>\n" % self.name) 2095 self.serialize_xrefs(output) 2096 output.write("</apirefs>\n") 2097 output.close() 2098 2099 2100def rebuild(): 2101 builder = None 2102 if glob.glob("parser.c") != [] : 2103 print "Rebuilding API description for libxml2" 2104 builder = docBuilder("libxml2", [".", "."], 2105 ["xmlwin32version.h", "tst.c"]) 2106 elif glob.glob("../parser.c") != [] : 2107 print "Rebuilding API description for libxml2" 2108 builder = docBuilder("libxml2", ["..", "../include/libxml"], 2109 ["xmlwin32version.h", "tst.c"]) 2110 elif glob.glob("../libxslt/transform.c") != [] : 2111 print "Rebuilding API description for libxslt" 2112 builder = docBuilder("libxslt", ["../libxslt"], 2113 ["win32config.h", "libxslt.h", "tst.c"]) 2114 else: 2115 print "rebuild() failed, unable to guess the module" 2116 return None 2117 builder.scan() 2118 builder.analyze() 2119 builder.serialize() 2120 if glob.glob("../libexslt/exslt.c") != [] : 2121 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"]) 2122 extra.scan() 2123 extra.analyze() 2124 extra.serialize() 2125 return builder 2126 2127# 2128# for debugging the parser 2129# 2130def parse(filename): 2131 parser = CParser(filename) 2132 idx = parser.parse() 2133 return idx 2134 2135if __name__ == "__main__": 2136 if len(sys.argv) > 1: 2137 debug = 1 2138 parse(sys.argv[1]) 2139 else: 2140 rebuild() 2141