Scanner.java revision 608:7e06bf1dcb09
1/* 2 * Copyright (c) 1999, 2001, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25/* 26 * COMPONENT_NAME: idl.parser 27 * 28 * ORIGINS: 27 29 * 30 * Licensed Materials - Property of IBM 31 * 5639-D57 (C) COPYRIGHT International Business Machines Corp. 1997, 1999 32 * RMI-IIOP v1.0 33 * 34 */ 35 36package com.sun.tools.corba.se.idl; 37 38// NOTES: 39// -F46082.51<daz> Remove -stateful feature. 40// -D56351<daz> Update computation of RepositoryIDs to CORBA 2.3 (see spec.). 41// -D59166<daz> Add escaped-id. info. to identifiers. 42// -F60858.1<daz> Add support for -corba option, levels 2.2 and 2.3: accept 2.3 43// keywords as ids.; accept ids. that match keywords in letter, but not in case. 44// -D62023<daz> Add support for -corba option, level 2.4: see keyword checking. 45 46import java.io.EOFException; 47import java.io.File; 48import java.io.FileReader; 49import java.io.IOException; 50import java.io.InputStream; 51import java.io.PushbackInputStream; 52 53import java.util.Enumeration; 54import java.util.Stack; 55import java.util.StringTokenizer; 56import java.util.Vector; 57 58/** 59 * 60 **/ 61class Scanner 62{ 63 // <f46082.51> -Remove stateful feature. 64 //Scanner (IncludeEntry file, String[] keywords, boolean vbose, boolean scanStateful, boolean emitAllIncludes) throws IOException 65 // <f60858.1> 66 //Scanner (IncludeEntry file, String[] keywords, boolean vbose, boolean emitAllIncludes) throws IOException 67 /** 68 * 69 **/ 70 Scanner (IncludeEntry file, String[] keywords, boolean vbose, 71 boolean emitAllIncludes, float cLevel, boolean debug) throws IOException 72 { 73 readFile (file); 74 verbose = vbose; 75 // <f46082.51> 76 //stateful = scanStateful; 77 emitAll = emitAllIncludes; 78 sortKeywords (keywords); 79 corbaLevel = cLevel; 80 this.debug = debug ; 81 } // ctor 82 83 /** 84 * 85 **/ 86 void sortKeywords (String[] keywords) 87 { 88 for (int i = 0; i < keywords.length; ++i) 89 if (wildcardAtEitherEnd (keywords[i])) 90 this.openEndedKeywords.addElement (keywords[i]); 91 else if (wildcardsInside (keywords[i])) 92 this.wildcardKeywords.addElement (keywords[i]); 93 else 94 this.keywords.addElement (keywords[i]); 95 } // sortKeywords 96 97 /** 98 * 99 **/ 100 private boolean wildcardAtEitherEnd (String string) 101 { 102 return string.startsWith ("*") || 103 string.startsWith ("+") || 104 string.startsWith (".") || 105 string.endsWith ("*") || 106 string.endsWith ("+") || 107 string.endsWith ("."); 108 } // wildcardAtEitherEnd 109 110 /** 111 * 112 **/ 113 private boolean wildcardsInside (String string) 114 { 115 return string.indexOf ("*") > 0 || 116 string.indexOf ("+") > 0 || 117 string.indexOf (".") > 0; 118 } // wildcardsInside 119 120 /** 121 * 122 **/ 123 void readFile (IncludeEntry file) throws IOException 124 { 125 String filename = file.name (); 126 filename = filename.substring (1, filename.length () - 1); 127 readFile (file, filename); 128 } // readFile 129 130 /** 131 * 132 **/ 133 void readFile (IncludeEntry file, String filename) throws IOException 134 { 135 data.fileEntry = file; 136 data.filename = filename; 137 // <f49747.1> 138 //FileInputStream stream = new FileInputStream (data.filename); 139 //data.fileBytes = new byte [stream.available ()]; 140 //stream.read (data.fileBytes); 141 //stream.close (); <ajb> 142 File idlFile = new File (data.filename); 143 int len = (int)idlFile.length (); 144 FileReader fileReader = new FileReader (idlFile); 145 // <d41679> data.fileBytes = new char [len]; 146 final String EOL = System.getProperty ("line.separator"); 147 data.fileBytes = new char [len + EOL.length ()]; 148 149 fileReader.read (data.fileBytes, 0, len); 150 fileReader.close (); 151 152 // <d41679> 153 for (int i = 0; i < EOL.length (); i++) 154 data.fileBytes[len + i] = EOL.charAt (i); 155 156 readChar (); 157 } // readFile 158 159 /** 160 * 161 **/ 162 Token getToken () throws IOException 163 { 164 //System.out.println ("Scanner.getToken char = |" + data.ch + "| (ASCII " + (int)data.ch + ")."); 165 166 // The token could be null if the next set of characters 167 // is not a token: white space, comments, ignored preprocessor 168 // commands. 169 Token token = null; 170 String commentText = new String (""); 171 172 while (token == null) 173 try 174 { 175 data.oldIndex = data.fileIndex; 176 data.oldLine = data.line; 177 if (data.ch <= ' ') { 178 skipWhiteSpace (); 179 continue; 180 } 181 182 // Special case for wchar and wstring literals. 183 // The values are preceded by an L. 184 // 185 // Bug fix 4382578: Can't compile a wchar literal. 186 // 187 // REVISIT. This maps wchar/wstring literals to 188 // our char/string literal types. Eventually, we 189 // need to write stronger checking to be spec 190 // compliant in negative cases such as leaving the 191 // L off of a wide string or putting it on a string. 192 if (data.ch == 'L') { 193 // Check to see if the next character is an 194 // apostrophe. 195 readChar(); 196 // Note: This doesn't allow for space between 197 // the L and the apostrophe or quote. 198 if (data.ch == '\'') { 199 // It was a wchar literal. Get the value 200 // and return the token. 201 token = getCharacterToken(true); 202 readChar(); 203 continue; 204 } else 205 if (data.ch == '"') { 206 // It was a wstring literal. 207 // 208 // getUntil assumes we've already passed the 209 // first quote. 210 readChar (); 211 token = new Token (Token.StringLiteral, getUntil ('"'), true); 212 readChar (); 213 continue; 214 } else { 215 // It must not have been a wchar literal. 216 // Push the input back into the buffer, and 217 // fall to the next if case. 218 unread(data.ch); 219 unread('L'); 220 readChar(); 221 } 222 } 223 224 if ((data.ch >= 'a' && data.ch <= 'z') || 225 (data.ch >= 'A' && data.ch <= 'Z') || 226 // <f46082.40> Escaped identifier; see data member comments. 227 //(data.ch == '_' && underscoreOK) || <daz> 228 (data.ch == '_') || 229 Character.isLetter (data.ch)) { 230 token = getString (); 231 } else 232 if ((data.ch >= '0' && data.ch <= '9') || data.ch == '.') { 233 token = getNumber (); 234 } else { 235 switch (data.ch) 236 { 237 case ';': 238 token = new Token (Token.Semicolon); 239 break; 240 case '{': 241 token = new Token (Token.LeftBrace); 242 break; 243 case '}': 244 token = new Token (Token.RightBrace); 245 break; 246 case ':': 247 readChar (); 248 if (data.ch == ':') 249 token = new Token (Token.DoubleColon); 250 else 251 { 252 unread (data.ch); 253 token = new Token (Token.Colon); 254 } 255 break; 256 case ',': 257 token = new Token (Token.Comma); 258 break; 259 case '=': 260 readChar (); 261 if (data.ch == '=') 262 token = new Token (Token.DoubleEqual); 263 else 264 { 265 unread (data.ch); 266 token = new Token (Token.Equal); 267 } 268 break; 269 case '+': 270 token = new Token (Token.Plus); 271 break; 272 case '-': 273 token = new Token (Token.Minus); 274 break; 275 case '(': 276 token = new Token (Token.LeftParen); 277 break; 278 case ')': 279 token = new Token (Token.RightParen); 280 break; 281 case '<': 282 readChar (); 283 if (data.ch == '<') 284 token = new Token (Token.ShiftLeft); 285 else if (data.ch == '=') 286 token = new Token (Token.LessEqual); 287 else 288 { 289 unread (data.ch); 290 token = new Token (Token.LessThan); 291 } 292 break; 293 case '>': 294 readChar (); 295 if (data.ch == '>') 296 token = new Token (Token.ShiftRight); 297 else if (data.ch == '=') 298 token = new Token (Token.GreaterEqual); 299 else 300 { 301 unread (data.ch); 302 token = new Token (Token.GreaterThan); 303 } 304 break; 305 case '[': 306 token = new Token (Token.LeftBracket); 307 break; 308 case ']': 309 token = new Token (Token.RightBracket); 310 break; 311 case '\'': 312 token = getCharacterToken(false); 313 break; 314 case '"': 315 readChar (); 316 token = new Token (Token.StringLiteral, getUntil ('"', false, false, false)); 317 break; 318 case '\\': 319 readChar (); 320 // If this is at the end of a line, then it is the 321 // line continuation character - treat it as white space 322 if (data.ch == '\n' || data.ch == '\r') 323 token = null; 324 else 325 token = new Token (Token.Backslash); 326 break; 327 case '|': 328 readChar (); 329 if (data.ch == '|') 330 token = new Token (Token.DoubleBar); 331 else 332 { 333 unread (data.ch); 334 token = new Token (Token.Bar); 335 } 336 break; 337 case '^': 338 token = new Token (Token.Carat); 339 break; 340 case '&': 341 readChar (); 342 if (data.ch == '&') 343 token = new Token (Token.DoubleAmpersand); 344 else 345 { 346 unread (data.ch); 347 token = new Token (Token.Ampersand); 348 } 349 break; 350 case '*': 351 token = new Token (Token.Star); 352 break; 353 case '/': 354 readChar (); 355 // <21jul1997daz> Extract comments rather than skipping them. 356 // Preserve only the comment immediately preceding the next token. 357 if (data.ch == '/') 358 //skipLineComment (); 359 commentText = getLineComment(); 360 else if (data.ch == '*') 361 //skipBlockComment (); 362 commentText = getBlockComment(); 363 else 364 { 365 unread (data.ch); 366 token = new Token (Token.Slash); 367 } 368 break; 369 case '%': 370 token = new Token (Token.Percent); 371 break; 372 case '~': 373 token = new Token (Token.Tilde); 374 break; 375 376 // The period token is recognized in getNumber. 377 // The period is only valid in a floating ponit number. 378 //case '.': 379 // token = new Token (Token.Period); 380 // break; 381 382 case '#': 383 token = getDirective (); 384 break; 385 case '!': 386 readChar (); 387 if (data.ch == '=') 388 token = new Token (Token.NotEqual); 389 else 390 { 391 unread (data.ch); 392 token = new Token (Token.Exclamation); 393 } 394 break; 395 case '?': 396 try 397 { 398 token = replaceTrigraph (); 399 break; 400 } 401 catch (InvalidCharacter e) {} 402 default: 403 throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch); 404 } 405 readChar (); 406 } 407 } 408 catch (EOFException e) 409 { 410 token = new Token (Token.EOF); 411 } 412 413 // Transfer comment to parser via token. <daz>21jul1997 414 token.comment = new Comment( commentText ); 415 416 //System.out.println ("Scanner.getToken returning token.type = " + token.type); 417 //if (token.type == Token.Identifier || token.type == Token.MacroIdentifier || (token.type >= Token.BooleanLiteral && token.type <= Token.StringLiteral)) 418 // System.out.println ("Scanner.getToken returns token.name = " + token.name); 419 420 if (debug) 421 System.out.println( "Token: " + token ) ; 422 423 return token; 424 } // getToken 425 426 /** 427 * 428 **/ 429 void scanString (String string) 430 { 431 dataStack.push (data); 432 433 data = new ScannerData (data); 434 435 data.fileIndex = 0; 436 data.oldIndex = 0; 437 // <f49747.1> data.fileBytes = string.getBytes (); <ajb> 438 int strLen = string.length(); 439 data.fileBytes = new char[strLen]; 440 string.getChars (0, strLen, data.fileBytes, 0); 441 442 data.macrodata = true; 443 444 try {readChar ();} catch (IOException e) {} 445 } // scanString 446 447 /** 448 * 449 **/ 450 void scanIncludedFile (IncludeEntry file, String filename, boolean includeIsImport) throws IOException 451 { 452 dataStack.push (data); 453 data = new ScannerData (); 454 data.indent = ((ScannerData)dataStack.peek ()).indent + ' '; 455 data.includeIsImport = includeIsImport; 456 try 457 { 458 readFile (file, filename); 459 if (!emitAll && includeIsImport) 460 SymtabEntry.enteringInclude (); 461 // <d56351> As of CORBA 2.3, include files define new scope for Repository 462 // ID prefixes. The previous Rep. ID is just below the top of the stack and 463 // must be restored when the contents of this include file are parsed (see readCh()). 464 Parser.enteringInclude (); 465 466 if (verbose) 467 System.out.println (data.indent + Util.getMessage ("Compile.parsing", filename)); 468 } 469 catch (IOException e) 470 { 471 data = (ScannerData)dataStack.pop (); 472 throw e; 473 } 474 } // scanIncludedFile 475 476 /** 477 * 478 **/ 479 private void unread (char ch) 480 { 481 if (ch == '\n' && !data.macrodata) --data.line; 482 --data.fileIndex; 483 } // unread 484 485 /** 486 * 487 **/ 488 void readChar () throws IOException 489 { 490 if (data.fileIndex >= data.fileBytes.length) 491 if (dataStack.empty ()) 492 throw new EOFException (); 493 else 494 { 495 // <d56351> Indicate end-of-scope for include file to parser. 496 //Parser.exitingInclude (); 497 498 // IBM.11666 - begin 499 //if (!emitAll && data.includeIsImport && !data.macrodata) 500 //{ 501 //SymtabEntry.exitingInclude (); 502 //Parser.exitingInclude (); // <d59469> 503 //} 504 if (!data.macrodata) 505 { 506 if (!emitAll && data.includeIsImport) 507 SymtabEntry.exitingInclude(); 508 Parser.exitingInclude(); 509 } // IBM.11666 - end 510 511 if (verbose && !data.macrodata) 512 System.out.println (data.indent + Util.getMessage ("Compile.parseDone", data.filename)); 513 data = (ScannerData)dataStack.pop (); 514 } 515 else 516 { 517 data.ch = (char)(data.fileBytes[data.fileIndex++] & 0x00ff); 518 if (data.ch == '\n' && !data.macrodata) ++data.line; 519 } 520 } // readChar 521 522 /** 523 * Starting at a quote, reads a string with possible 524 * unicode or octal values until an end quote. Doesn't 525 * handle line feeds or comments. 526 */ 527 private String getWString() throws IOException 528 { 529 readChar(); 530 StringBuffer result = new StringBuffer(); 531 532 while (data.ch != '"') { 533 if (data.ch == '\\') { 534 // Could be a \ooo octal or 535 // unicode hex 536 readChar(); 537 if (data.ch == 'u') { 538 // Unicode hex 539 int num = getNDigitHexNumber(4); 540 System.out.println("Got num: " + num); 541 System.out.println("Which is: " + (int)(char)num); 542 result.append((char)num); 543 // result.append((char)getNDigitHexNumber(4)); 544 // getNDigitHexNumber reads the next 545 // character, so loop without reading another 546 continue; 547 } else 548 if (data.ch >= '0' && data.ch <= '7') { 549 // Octal 550 result.append((char)get3DigitOctalNumber()); 551 // get3DigitOctalNumber reads the next 552 // character, so loop without reading another 553 continue; 554 } else { 555 // Wasn't either, so just append the 556 // slash and current character. 557 result.append('\\'); 558 result.append(data.ch); 559 } 560 } else { 561 // Just append the character 562 result.append(data.ch); 563 } 564 565 // Advance to the next character 566 readChar(); 567 } 568 569 return result.toString(); 570 } 571 572 /** 573 * 574 **/ 575 private Token getCharacterToken(boolean isWide) throws IOException 576 { 577 // The token name returned contains a string with two elements: 578 // first the character appears, then the representation of the 579 // character. These are typically the same, but they CAN be 580 // different, for example "O\117" 581 Token token = null; 582 readChar (); 583 if ( data.ch == '\\' ) 584 { 585 readChar (); 586 if ((data.ch == 'x') || (data.ch == 'u')) 587 { 588 char charType = data.ch; 589 int hexNum = getNDigitHexNumber ((charType == 'x') ? 2 : 4); 590 return new Token (Token.CharacterLiteral, 591 ((char)hexNum) + "\\" + charType + Integer.toString (hexNum, 16), isWide ); 592 } 593 if ((data.ch >= '0') && (data.ch <= '7')) 594 { 595 int octNum = get3DigitOctalNumber (); 596 return new Token (Token.CharacterLiteral, 597 ((char)octNum) + "\\" + Integer.toString (octNum, 8), isWide ); 598 } 599 return singleCharEscapeSequence (isWide); 600 } 601 token = new Token (Token.CharacterLiteral, "" + data.ch + data.ch, isWide ); 602 readChar (); 603 return token; 604 } // getCharacterToken 605 606 /** 607 * 608 **/ 609 private Token singleCharEscapeSequence (boolean isWide) throws IOException 610 { 611 Token token; 612 if (data.ch == 'n') 613 // newline 614 token = new Token (Token.CharacterLiteral, "\n\\n", isWide); 615 else if (data.ch == 't') 616 // horizontal tab 617 token = new Token (Token.CharacterLiteral, "\t\\t", isWide); 618 else if (data.ch == 'v') 619 // vertical tab 620 token = new Token (Token.CharacterLiteral, "\013\\v", isWide); 621 else if (data.ch == 'b') 622 // backspace 623 token = new Token (Token.CharacterLiteral, "\b\\b", isWide); 624 else if (data.ch == 'r') 625 // carriage return 626 token = new Token (Token.CharacterLiteral, "\r\\r", isWide); 627 else if (data.ch == 'f') 628 // form feed 629 token = new Token (Token.CharacterLiteral, "\f\\f", isWide); 630 else if (data.ch == 'a') 631 // alert 632 token = new Token (Token.CharacterLiteral, "\007\\a", isWide); 633 else if (data.ch == '\\') 634 // backslash 635 token = new Token (Token.CharacterLiteral, "\\\\\\", isWide); 636 else if (data.ch == '?') 637 // question mark 638 token = new Token (Token.CharacterLiteral, "?\\?", isWide); 639 else if (data.ch == '\'') 640 // single quote 641 token = new Token (Token.CharacterLiteral, "'\\'", isWide); 642 else if (data.ch == '"') 643 // double quote 644 token = new Token (Token.CharacterLiteral, "\"\\\"", isWide); 645 else 646 throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch); 647 readChar (); 648 return token; 649 } // singleCharEscapeSequence 650 651 private Token getString () throws IOException 652 { 653 StringBuffer sbuf = new StringBuffer() ; 654 boolean escaped = false; // <d59166> 655 boolean[] collidesWithKeyword = { false } ; // <d62023> 656 657 // <f46082.40> An escaped id. begins with '_', which is followed by a normal 658 // identifier. Disallow prefixes of '_' having length > 1. 659 if (data.ch == '_') { 660 sbuf.append( data.ch ) ; 661 readChar (); 662 if (escaped = escapedOK) 663 if (data.ch == '_') 664 throw new InvalidCharacter (data.filename, currentLine (), 665 currentLineNumber (), currentLinePosition (), data.ch); 666 } 667 668 // Build up the string of valid characters until a non-string 669 // character is encountered. 670 while (Character.isLetterOrDigit( data.ch ) || (data.ch == '_')) { 671 sbuf.append( data.ch ) ; 672 readChar() ; 673 } 674 675 String string = sbuf.toString() ; 676 677 // <f46082.40> Escaped identifiers - If identifier has '_' prefix, ignore 678 // keyword check and strip '_'; otherwise, perform keyword check. 679 680 if (!escaped) { // Escaped id ==> ignore keyword check 681 Token result = Token.makeKeywordToken( string, corbaLevel, escapedOK, 682 collidesWithKeyword ) ; 683 if (result != null) 684 return result ; 685 } 686 687 // At this point the string is an identifier. If it is a 688 // string which is also a Java keyword, prepend an underscore 689 // so that it doesn't generate a compiler error. 690 string = getIdentifier (string); 691 692 // If a left paren immediately follows, this could be a 693 // macro definition, return a MacroIdentifier 694 if (data.ch == '(') { 695 readChar (); 696 return new Token (Token.MacroIdentifier, string, escaped, 697 collidesWithKeyword[0], false); 698 } else 699 return new Token (Token.Identifier, string, escaped, 700 collidesWithKeyword[0], false); 701 } 702 703 // Wildcard values 704 static final int Star = 0, Plus = 1, Dot = 2, None = 3; 705 706 /** 707 * 708 **/ 709 private boolean matchesClosedWildKeyword (String string) 710 { 711 boolean found = true; 712 String tmpString = string; 713 Enumeration e = wildcardKeywords.elements (); 714 while (e.hasMoreElements ()) 715 { 716 int wildcard = None; 717 StringTokenizer tokens = new StringTokenizer ((String)e.nextElement (), "*+.", true); 718 if (tokens.hasMoreTokens ()) 719 { 720 String token = tokens.nextToken (); 721 if (tmpString.startsWith (token)) 722 { 723 tmpString = tmpString.substring (token.length ()); 724 while (tokens.hasMoreTokens () && found) 725 { 726 token = tokens.nextToken (); 727 if (token.equals ("*")) 728 wildcard = Star; 729 else if (token.equals ("+")) 730 wildcard = Plus; 731 else if (token.equals (".")) 732 wildcard = Dot; 733 else if (wildcard == Star) 734 { 735 int index = tmpString.indexOf (token); 736 if (index >= 0) 737 tmpString = tmpString.substring (index + token.length ()); 738 else 739 found = false; 740 } 741 else if (wildcard == Plus) 742 { 743 int index = tmpString.indexOf (token); 744 if (index > 0) 745 tmpString = tmpString.substring (index + token.length ()); 746 else 747 found = false; 748 } 749 else if (wildcard == Dot) 750 { 751 int index = tmpString.indexOf (token); 752 if (index == 1) 753 tmpString = tmpString.substring (1 + token.length ()); 754 else 755 found = false; 756 } 757 } 758 if (found && tmpString.equals ("")) 759 break; 760 } 761 } 762 } 763 return found && tmpString.equals (""); 764 } // matchesClosedWildKeyword 765 766 /** 767 * 768 **/ 769 private String matchesOpenWildcard (String string) 770 { 771 Enumeration e = openEndedKeywords.elements (); 772 String prepend = ""; 773 while (e.hasMoreElements ()) 774 { 775 int wildcard = None; 776 boolean found = true; 777 String tmpString = string; 778 StringTokenizer tokens = new StringTokenizer ((String)e.nextElement (), "*+.", true); 779 while (tokens.hasMoreTokens () && found) 780 { 781 String token = tokens.nextToken (); 782 if (token.equals ("*")) 783 wildcard = Star; 784 else if (token.equals ("+")) 785 wildcard = Plus; 786 else if (token.equals (".")) 787 wildcard = Dot; 788 else if (wildcard == Star) 789 { 790 wildcard = None; 791 int index = tmpString.lastIndexOf (token); 792 if (index >= 0) 793 tmpString = blankOutMatch (tmpString, index, token.length ()); 794 else 795 found = false; 796 } 797 else if (wildcard == Plus) 798 { 799 wildcard = None; 800 int index = tmpString.lastIndexOf (token); 801 if (index > 0) 802 tmpString = blankOutMatch (tmpString, index, token.length ()); 803 else 804 found = false; 805 } 806 else if (wildcard == Dot) 807 { 808 wildcard = None; 809 int index = tmpString.lastIndexOf (token); 810 if (index == 1) 811 tmpString = blankOutMatch (tmpString, 1, token.length ()); 812 else 813 found = false; 814 } 815 else if (wildcard == None) 816 if (tmpString.startsWith (token)) 817 tmpString = blankOutMatch (tmpString, 0, token.length ()); 818 else 819 found = false; 820 } 821 822 // Make sure that, if the last character of the keyword is a 823 // wildcard, that the string matches what the wildcard 824 // requires. 825 if (found) 826 { 827 if (wildcard == Star) 828 ; 829 else if (wildcard == Plus && tmpString.lastIndexOf (' ') != tmpString.length () - 1) 830 ; 831 else if (wildcard == Dot && tmpString.lastIndexOf (' ') == tmpString.length () - 2) 832 ; 833 else if (wildcard == None && tmpString.lastIndexOf (' ') == tmpString.length () - 1) 834 ; 835 else 836 found = false; 837 } 838 // If found, then prepend an underscore. But also try matching 839 // again after leading and trailing blanks are removed from 840 // tmpString. This isn't quite right, but it solves a problem 841 // which surfaced in the Java mapping. For example: 842 // openEndedKeywords = {"+Helper", "+Holder", "+Package"}; 843 // string = fooHelperPackage. 844 // Given the mechanics of the Java mapping, _fooHelperPackage 845 // COULD have a conflict, so for each occurance of a keyword, 846 // an underscore is added, so this would cause two underscores: 847 // __fooHelperPackage. To accomplish this, the first time thru 848 // tmpString is "fooHelper " at this point, strip off the 849 // trailing blanks and try matching "fooHelper". This also 850 // matches, so two underscores are prepended. 851 if (found) 852 { 853 prepend = prepend + "_" + matchesOpenWildcard (tmpString.trim ()); 854 break; 855 } 856 } 857 return prepend; 858 } // matchesOpenWildcard 859 860 /** 861 * 862 **/ 863 private String blankOutMatch (String string, int start, int length) 864 { 865 char[] blanks = new char [length]; 866 for (int i = 0; i < length; ++i) 867 blanks[i] = ' '; 868 return string.substring (0, start) + new String (blanks) + string.substring (start + length); 869 } // blankOutMatch 870 871 /** 872 * 873 **/ 874 private String getIdentifier (String string) 875 { 876 if (keywords.contains (string)) 877 // string matches a non-wildcard keyword 878 string = '_' + string; 879 else 880 { 881 // Check to see if string matches any wildcard keywords that 882 // aren't open ended (don't have a wildcard as the first or 883 // last character. 884 String prepend = ""; 885 if (matchesClosedWildKeyword (string)) 886 prepend = "_"; 887 else 888 // string did not match any closed wildcard keywords (that 889 // is, keywords with wildcards anywhere but at the beginning 890 // or end of the word). 891 // Now check for * + or . at the beginning or end. 892 // These require special handling because they could match 893 // more than one keyword. prepend an underscore for each 894 // matched keyword. 895 prepend = matchesOpenWildcard (string); 896 string = prepend + string; 897 } 898 return string; 899 } // getIdentifier 900 901 /** 902 * 903 **/ 904 private Token getDirective () throws IOException 905 { 906 readChar (); 907 String string = new String (); 908 while ((data.ch >= 'a' && data.ch <= 'z') || (data.ch >= 'A' && data.ch <= 'Z')) 909 { 910 string = string + data.ch; 911 readChar (); 912 } 913 unread (data.ch); 914 for (int i = 0; i < Token.Directives.length; ++i) 915 if (string.equals (Token.Directives[i])) 916 return new Token (Token.FirstDirective + i); 917 // If it got this far, it is an unknown preprocessor directive. 918 return new Token (Token.Unknown, string); 919 } // getDirective 920 921 /** 922 * 923 **/ 924 private Token getNumber () throws IOException 925 { 926 if (data.ch == '.') 927 return getFractionNoInteger (); 928 else if (data.ch == '0') 929 return isItHex (); 930 else // the only other possibliities are 1..9 931 return getInteger (); 932 } // getNumber 933 934 /** 935 * 936 **/ 937 private Token getFractionNoInteger () throws IOException 938 { 939 readChar (); 940 if (data.ch >= '0' && data.ch <= '9') 941 return getFraction ("."); 942 else 943 return new Token (Token.Period); 944 } // getFractionNoInteger 945 946 /** 947 * 948 **/ 949 private Token getFraction (String string) throws IOException 950 { 951 while (data.ch >= '0' && data.ch <= '9') 952 { 953 string = string + data.ch; 954 readChar (); 955 } 956 if (data.ch == 'e' || data.ch == 'E') 957 return getExponent (string + 'E'); 958 else 959 return new Token (Token.FloatingPointLiteral, string); 960 } // getFraction 961 962 /** 963 * 964 **/ 965 private Token getExponent (String string) throws IOException 966 { 967 readChar (); 968 if (data.ch == '+' || data.ch == '-') 969 { 970 string = string + data.ch; 971 readChar (); 972 } 973 else if (data.ch < '0' || data.ch > '9') 974 throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch); 975 while (data.ch >= '0' && data.ch <= '9') 976 { 977 string = string + data.ch; 978 readChar (); 979 } 980 return new Token (Token.FloatingPointLiteral, string); 981 } // getExponent 982 983 /** 984 * 985 **/ 986 private Token isItHex () throws IOException 987 { 988 readChar (); 989 if (data.ch == '.') 990 { 991 readChar (); 992 return getFraction ("0."); 993 } 994 else if (data.ch == 'x' || data.ch == 'X') 995 return getHexNumber ("0x"); 996 else if (data.ch == '8' || data.ch == '9') 997 throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch); 998 else if (data.ch >= '0' && data.ch <= '7') 999 return getOctalNumber (); 1000 else if (data.ch == 'e' || data.ch == 'E') 1001 return getExponent ("0E"); 1002 else 1003 return new Token (Token.IntegerLiteral, "0"); 1004 } // isItHex 1005 1006 /** 1007 * 1008 **/ 1009 private Token getOctalNumber () throws IOException 1010 { 1011 String string = "0" + data.ch; 1012 readChar (); 1013 while ((data.ch >= '0' && data.ch <= '9')) 1014 { 1015 if (data.ch == '8' || data.ch == '9') 1016 throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch); 1017 string = string + data.ch; 1018 readChar (); 1019 } 1020 return new Token (Token.IntegerLiteral, string); 1021 } // getOctalNumber 1022 1023 /** 1024 * 1025 **/ 1026 private Token getHexNumber (String string) throws IOException 1027 { 1028 readChar (); 1029 if ((data.ch < '0' || data.ch > '9') && (data.ch < 'a' || data.ch > 'f') && (data.ch < 'A' || data.ch > 'F')) 1030 throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch); 1031 else 1032 while ((data.ch >= '0' && data.ch <= '9') || (data.ch >= 'a' && data.ch <= 'f') || (data.ch >= 'A' && data.ch <= 'F')) 1033 { 1034 string = string + data.ch; 1035 readChar (); 1036 } 1037 return new Token (Token.IntegerLiteral, string); 1038 } // getHexNumber 1039 1040 /** 1041 * 1042 **/ 1043 private int getNDigitHexNumber (int n) throws IOException 1044 { 1045 readChar (); 1046 if (!isHexChar (data.ch)) 1047 throw new InvalidCharacter (data.filename, currentLine (), 1048 currentLineNumber (), currentLinePosition (), data.ch); 1049 String string = "" + data.ch; 1050 readChar (); 1051 for (int i = 2; i <= n; i++) 1052 { 1053 if (!isHexChar( data.ch)) 1054 break; 1055 string += data.ch; 1056 readChar (); 1057 } 1058 try 1059 { 1060 return Integer.parseInt (string, 16); 1061 } 1062 catch (NumberFormatException e) 1063 { 1064 } 1065 return 0; 1066 } // getNDigitHexNumber 1067 1068 /** 1069 * 1070 **/ 1071 private boolean isHexChar ( char hex ) 1072 { 1073 return ((data.ch >= '0') && (data.ch <= '9')) || 1074 ((data.ch >= 'a') && (data.ch <= 'f')) || 1075 ((data.ch >= 'A') && (data.ch <= 'F')); 1076 } 1077 1078 /** 1079 * 1080 **/ 1081 private int get3DigitOctalNumber () throws IOException 1082 { 1083 char firstDigit = data.ch; 1084 String string = "" + data.ch; 1085 readChar (); 1086 if (data.ch >= '0' && data.ch <= '7') 1087 { 1088 string = string + data.ch; 1089 readChar (); 1090 if (data.ch >= '0' && data.ch <= '7') 1091 { 1092 string = string + data.ch; 1093 if (firstDigit > '3') 1094 // This is a 3-digit number bigger than 377 1095 throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), firstDigit); 1096 readChar (); 1097 } 1098 } 1099 int ret = 0; 1100 try 1101 { 1102 ret = Integer.parseInt (string, 8); 1103 } 1104 catch (NumberFormatException e) 1105 { 1106 throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), string.charAt (0)); 1107 } 1108 return ret; 1109 } // get3DigitOctalNumber 1110 1111 /** 1112 * 1113 **/ 1114 private Token getInteger () throws IOException 1115 { 1116 String string = "" + data.ch; 1117 readChar (); 1118 if (data.ch == '.') 1119 { 1120 readChar (); 1121 return getFraction (string + '.'); 1122 } 1123 else if (data.ch == 'e' || data.ch == 'E') 1124 return getExponent (string + 'E'); 1125 else if (data.ch >= '0' && data.ch <= '9') 1126 while (data.ch >= '0' && data.ch <= '9') 1127 { 1128 string = string + data.ch; 1129 readChar (); 1130 if (data.ch == '.') 1131 { 1132 readChar (); 1133 return getFraction (string + '.'); 1134 } 1135 } 1136 return new Token (Token.IntegerLiteral, string); 1137 } // getInteger 1138 1139 /** 1140 * 1141 **/ 1142 private Token replaceTrigraph () throws IOException 1143 { 1144 readChar (); 1145 if (data.ch == '?') 1146 { 1147 readChar (); 1148 if (data.ch == '=') 1149 data.ch = '#'; 1150 else if (data.ch == '/') 1151 data.ch = '\\'; 1152 else if (data.ch == '\'') 1153 data.ch = '^'; 1154 else if (data.ch == '(') 1155 data.ch = '['; 1156 else if (data.ch == ')') 1157 data.ch = ']'; 1158 else if (data.ch == '!') 1159 data.ch = '|'; 1160 else if (data.ch == '<') 1161 data.ch = '{'; 1162 else if (data.ch == '>') 1163 data.ch = '}'; 1164 else if (data.ch == '-') 1165 data.ch = '~'; 1166 else 1167 { 1168 unread (data.ch); 1169 unread ('?'); 1170 throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch); 1171 } 1172 return getToken (); 1173 } 1174 else 1175 { 1176 unread ('?'); 1177 throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch); 1178 } 1179 } // replaceTrigraph 1180 1181 /** 1182 * 1183 **/ 1184 void skipWhiteSpace () throws IOException 1185 { 1186 while (data.ch <= ' ') 1187 readChar (); 1188 } // skipWhiteSpace 1189 1190 /** 1191 * 1192 **/ 1193 private void skipBlockComment () throws IOException 1194 { 1195 try 1196 { 1197 boolean done = false; 1198 readChar (); 1199 while (!done) 1200 { 1201 while (data.ch != '*') 1202 readChar (); 1203 readChar (); 1204 if (data.ch == '/') 1205 done = true; 1206 } 1207 } 1208 catch (EOFException e) 1209 { 1210 ParseException.unclosedComment (data.filename); 1211 throw e; 1212 } 1213 } // skipBlockComment 1214 1215 /** 1216 * 1217 **/ 1218 void skipLineComment () throws IOException 1219 { 1220 while (data.ch != '\n') 1221 readChar (); 1222 } // skipLineComment 1223 1224 // The following two routines added to extract comments rather 1225 // than ignore them. 1226 1227 /** 1228 * Extract a line comment from the input buffer. 1229 **/ 1230 private String getLineComment () throws IOException 1231 { 1232 StringBuffer sb = new StringBuffer( "/" ); 1233 while (data.ch != '\n') 1234 { 1235 if (data.ch != '\r') 1236 sb.append (data.ch); 1237 readChar (); 1238 } 1239 return sb.toString(); 1240 } // getLineComment 1241 1242 /** 1243 * Extract a block comment from the input buffer. 1244 **/ 1245 private String getBlockComment () throws IOException 1246 { 1247 StringBuffer sb = new StringBuffer ("/*"); 1248 try 1249 { 1250 boolean done = false; 1251 readChar (); 1252 sb.append (data.ch); 1253 while (!done) 1254 { 1255 while (data.ch != '*') 1256 { 1257 readChar (); 1258 sb.append (data.ch); 1259 } 1260 readChar (); 1261 sb.append (data.ch); 1262 if (data.ch == '/') 1263 done = true; 1264 } 1265 } 1266 catch (EOFException e) 1267 { 1268 ParseException.unclosedComment (data.filename); 1269 throw e; 1270 } 1271 return sb.toString (); 1272 } // getBlockComment 1273 1274 /** 1275 * 1276 **/ 1277 Token skipUntil (char c) throws IOException 1278 { 1279 while (data.ch != c) 1280 { 1281 if (data.ch == '/') 1282 { 1283 readChar (); 1284 if (data.ch == '/') 1285 { 1286 skipLineComment (); 1287 // If this is skipping until the newline, skipLineComment 1288 // reads past the newline, so it won't be seen by the 1289 // while loop conditional check. 1290 if (c == '\n') break; 1291 } 1292 else if (data.ch == '*') 1293 skipBlockComment (); 1294 } 1295 else 1296 readChar (); 1297 } 1298 return getToken (); 1299 } // skipUntil 1300 1301 // getUntil is used for macro definitions and to get quoted 1302 // strings, so characters within "("...")" and '"'...'"' are 1303 // ignored. Ie getUntil ',' on (,,,,),X will return (,,,,) 1304 1305 String getUntil (char c) throws IOException 1306 { 1307 return getUntil (c, true, true, true); 1308 } 1309 1310 String getUntil (char c, boolean allowQuote, boolean allowCharLit, boolean allowComment) throws IOException 1311 { 1312 String string = ""; 1313 while (data.ch != c) 1314 string = appendToString (string, allowQuote, allowCharLit, allowComment); 1315 return string; 1316 } // getUntil 1317 1318 /** 1319 * 1320 **/ 1321 String getUntil (char c1, char c2) throws IOException 1322 { 1323 String string = ""; 1324 while (data.ch != c1 && data.ch != c2) 1325 string = appendToString (string, false, false, false); 1326 return string; 1327 } // getUntil 1328 1329 /** 1330 * 1331 **/ 1332 private String appendToString (String string, boolean allowQuote, boolean allowCharLit, boolean allowComment) throws IOException 1333 { 1334 // Ignore any comments if they are allowed 1335 if (allowComment && data.ch == '/') 1336 { 1337 readChar (); 1338 if (data.ch == '/') 1339 skipLineComment (); 1340 else if (data.ch == '*') 1341 skipBlockComment (); 1342 else 1343 string = string + '/'; 1344 } 1345 // Handle line continuation character 1346 else if (data.ch == '\\') 1347 { 1348 readChar (); 1349 if (data.ch == '\n') 1350 readChar (); 1351 else if (data.ch == '\r') 1352 { 1353 readChar (); 1354 if (data.ch == '\n') 1355 readChar (); 1356 } 1357 else 1358 { 1359 string = string + '\\' + data.ch; 1360 readChar (); 1361 } 1362 } 1363 // characters within "("...")" and '"'...'"' are ignored. 1364 // Ie getUntil ',' on (,,,,),X will return (,,,) 1365 else 1366 { 1367 if (allowCharLit && data.ch == '"') 1368 { 1369 readChar (); 1370 string = string + '"'; 1371 while (data.ch != '"') 1372 string = appendToString (string, true, false, allowComment); 1373 } 1374 else if (allowQuote && allowCharLit && data.ch == '(') 1375 { 1376 readChar (); 1377 string = string + '('; 1378 while (data.ch != ')') 1379 string = appendToString (string, false, false, allowComment); 1380 } 1381 else if (allowQuote && data.ch == '\'') 1382 { 1383 readChar (); 1384 string = string + "'"; 1385 while (data.ch != '\'') 1386 string = appendToString (string, false, true, allowComment); 1387 } 1388 string = string + data.ch; 1389 readChar (); 1390 } 1391 return string; 1392 } // appendToString 1393 1394 /** 1395 * 1396 **/ 1397 String getStringToEOL () throws IOException 1398 { 1399 String string = new String (); 1400 while (data.ch != '\n') 1401 { 1402 if (data.ch == '\\') 1403 { 1404 readChar (); 1405 if (data.ch == '\n') 1406 readChar (); 1407 else if (data.ch == '\r') 1408 { 1409 readChar (); 1410 if (data.ch == '\n') 1411 readChar (); 1412 } 1413 else 1414 { 1415 string = string + data.ch; 1416 readChar (); 1417 } 1418 } 1419 else 1420 { 1421 string = string + data.ch; 1422 readChar (); 1423 } 1424 } 1425 return string; 1426 } // getStringToEOL 1427 1428 /** 1429 * 1430 **/ 1431 String filename () 1432 { 1433 return data.filename; 1434 } // filename 1435 1436 /** 1437 * 1438 **/ 1439 IncludeEntry fileEntry () 1440 { 1441 return data.fileEntry; 1442 } // fileEntry 1443 1444 /** 1445 * 1446 **/ 1447 int currentLineNumber () 1448 { 1449 return data.line; 1450 } // currentLineNumber 1451 1452 /** 1453 * 1454 **/ 1455 int lastTokenLineNumber () 1456 { 1457 return data.oldLine; 1458 } // lastTokenLineNumber 1459 1460 private int BOL; // Beginning Of Line 1461 1462 /** 1463 * 1464 **/ 1465 String currentLine () 1466 { 1467 BOL = data.fileIndex - 1; 1468 try 1469 { 1470 // If the current position is at the end of the line, 1471 // set BOL to before the end of the line so the whole 1472 // line is returned. 1473 if (data.fileBytes[BOL - 1] == '\r' && data.fileBytes[BOL] == '\n') 1474 BOL -= 2; 1475 else if (data.fileBytes[BOL] == '\n') 1476 --BOL; 1477 while (data.fileBytes[BOL] != '\n') 1478 --BOL; 1479 } 1480 catch (ArrayIndexOutOfBoundsException e) 1481 { 1482 BOL = -1; 1483 } 1484 ++BOL; // Go to the first character AFTER the newline 1485 int EOL = data.fileIndex - 1; 1486 try 1487 { 1488 while (data.fileBytes[EOL] != '\n' && data.fileBytes[EOL] != '\r') 1489 ++EOL; 1490 } 1491 catch (ArrayIndexOutOfBoundsException e) 1492 { 1493 EOL = data.fileBytes.length; 1494 } 1495 if (BOL < EOL) 1496 return new String (data.fileBytes, BOL, EOL - BOL); 1497 else 1498 return ""; 1499 } // currentLine 1500 1501 /** 1502 * 1503 **/ 1504 String lastTokenLine () 1505 { 1506 int saveFileIndex = data.fileIndex; 1507 data.fileIndex = data.oldIndex; 1508 String ret = currentLine (); 1509 data.fileIndex = saveFileIndex; 1510 return ret; 1511 } // lastTokenLine 1512 1513 /** 1514 * 1515 **/ 1516 int currentLinePosition () 1517 { 1518 return data.fileIndex - BOL; 1519 } // currentLinePosition 1520 1521 /** 1522 * 1523 **/ 1524 int lastTokenLinePosition () 1525 { 1526 return data.oldIndex - BOL; 1527 } // lastTokenLinePosition 1528 1529 // The scanner data is moved to a separate class so that all of the 1530 // data can easily be pushed and popped to a stack. 1531 1532 // The data must be stackable for macros and #included files. When 1533 // a macro is encountered: the current stack data is reserved on 1534 // the stack; the stack is loaded with the macro info; processing 1535 // proceeds with this data. The same is true for #included files. 1536 1537 // It may seem that the entire Scanner should be put on a stack in 1538 // the Parser since all the scanner data is stackable. But that 1539 // would mean instantiating a new scanner. The scanner must 1540 // continue from where it left off; when certain things cross file 1541 // boundaries, they must be handled by the scanner, not the parser, 1542 // things like: block comments, quoted strings, tokens. 1543 private ScannerData data = new ScannerData (); 1544 private Stack dataStack = new Stack (); 1545 private Vector keywords = new Vector (); 1546 private Vector openEndedKeywords = new Vector (); 1547 private Vector wildcardKeywords = new Vector (); 1548 private boolean verbose; 1549 // <f46082.40> Identifiers starting with '_' are considered "Escaped", 1550 // except when scanned during preprocessing. Class Preprocessor is 1551 // responsible to modify the escapedOK flag accordingly. Since preceding 1552 // underscores are now legal when scanning identifiers as well as 1553 // macro identifier, underscoreOK is obsolete. 1554 // 1555 // boolean underscoreOK = false; 1556 boolean escapedOK = true; 1557 // <f46082.51> Remove -stateful feature. 1558 // boolean stateful; 1559 private boolean emitAll; 1560 private float corbaLevel; 1561 private boolean debug ; 1562} // class Scanner 1563 1564// This is a dumb class, really just a struct. It contains all of the 1565// scanner class's data in one place so that that data can be easily 1566// pushed and popped to a stack. 1567 1568/** 1569 * 1570 **/ 1571class ScannerData 1572{ 1573 /** 1574 * 1575 **/ 1576 public ScannerData () 1577 { 1578 } // ctor 1579 1580 /** 1581 * 1582 **/ 1583 public ScannerData (ScannerData that) 1584 { 1585 indent = that.indent; 1586 fileEntry = that.fileEntry; 1587 filename = that.filename; 1588 fileBytes = that.fileBytes; 1589 fileIndex = that.fileIndex; 1590 oldIndex = that.oldIndex; 1591 ch = that.ch; 1592 line = that.line; 1593 oldLine = that.oldLine; 1594 macrodata = that.macrodata; 1595 includeIsImport = that.includeIsImport; 1596 } // copy ctor 1597 1598 String indent = ""; 1599 IncludeEntry fileEntry = null; 1600 String filename = ""; 1601 1602 // fileBytes is a byte array rather than a char array. This is 1603 // safe because OMG IDL is specified to be ISO Latin-1 whose high- 1604 // order byte is always 0x0. <f49747.1> Converted from byte[] to char[] 1605 // to employ Reader classes, which have Character encoding features. <ajb> 1606 //byte[] fileBytes = null; 1607 char[] fileBytes = null; 1608 int fileIndex = 0; 1609 int oldIndex = 0; 1610 char ch; 1611 int line = 1; 1612 int oldLine = 1; 1613 boolean macrodata = false; 1614 boolean includeIsImport = false; 1615} // class ScannerData 1616