Scanner.java revision 608:7e06bf1dcb09
1/*
2 * Copyright (c) 1999, 2001, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25/*
26 * COMPONENT_NAME: idl.parser
27 *
28 * ORIGINS: 27
29 *
30 * Licensed Materials - Property of IBM
31 * 5639-D57 (C) COPYRIGHT International Business Machines Corp. 1997, 1999
32 * RMI-IIOP v1.0
33 *
34 */
35
36package com.sun.tools.corba.se.idl;
37
38// NOTES:
39// -F46082.51<daz> Remove -stateful feature.
40// -D56351<daz> Update computation of RepositoryIDs to CORBA 2.3 (see spec.).
41// -D59166<daz> Add escaped-id. info. to identifiers.
42// -F60858.1<daz> Add support for -corba option, levels 2.2 and 2.3: accept 2.3
43//   keywords as ids.; accept ids. that match keywords in letter, but not in case.
44// -D62023<daz> Add support for -corba option, level 2.4: see keyword checking.
45
46import java.io.EOFException;
47import java.io.File;
48import java.io.FileReader;
49import java.io.IOException;
50import java.io.InputStream;
51import java.io.PushbackInputStream;
52
53import java.util.Enumeration;
54import java.util.Stack;
55import java.util.StringTokenizer;
56import java.util.Vector;
57
58/**
59 *
60 **/
61class Scanner
62{
63  // <f46082.51> -Remove stateful feature.
64  //Scanner (IncludeEntry file, String[] keywords, boolean vbose, boolean scanStateful, boolean emitAllIncludes) throws IOException
65  // <f60858.1>
66  //Scanner (IncludeEntry file, String[] keywords, boolean vbose, boolean emitAllIncludes) throws IOException
67  /**
68   *
69   **/
70  Scanner (IncludeEntry file, String[] keywords, boolean vbose,
71      boolean emitAllIncludes, float cLevel, boolean debug) throws IOException
72  {
73    readFile (file);
74    verbose  = vbose;
75    // <f46082.51>
76    //stateful = scanStateful;
77    emitAll  = emitAllIncludes;
78    sortKeywords (keywords);
79    corbaLevel = cLevel;
80    this.debug = debug ;
81  } // ctor
82
83  /**
84   *
85   **/
86  void sortKeywords (String[] keywords)
87  {
88    for (int i = 0; i < keywords.length; ++i)
89      if (wildcardAtEitherEnd (keywords[i]))
90        this.openEndedKeywords.addElement (keywords[i]);
91      else if (wildcardsInside (keywords[i]))
92        this.wildcardKeywords.addElement (keywords[i]);
93      else
94        this.keywords.addElement (keywords[i]);
95  } // sortKeywords
96
97  /**
98   *
99   **/
100  private boolean wildcardAtEitherEnd (String string)
101  {
102    return string.startsWith ("*") ||
103           string.startsWith ("+") ||
104           string.startsWith (".") ||
105           string.endsWith ("*") ||
106           string.endsWith ("+") ||
107           string.endsWith (".");
108  } // wildcardAtEitherEnd
109
110  /**
111   *
112   **/
113  private boolean wildcardsInside (String string)
114  {
115    return string.indexOf ("*") > 0 ||
116           string.indexOf ("+") > 0 ||
117           string.indexOf (".") > 0;
118  } // wildcardsInside
119
120  /**
121   *
122   **/
123  void readFile (IncludeEntry file) throws IOException
124  {
125    String filename = file.name ();
126    filename = filename.substring (1, filename.length () - 1);
127    readFile (file, filename);
128  } // readFile
129
130  /**
131   *
132   **/
133  void readFile (IncludeEntry file, String filename) throws IOException
134  {
135    data.fileEntry = file;
136    data.filename = filename;
137    // <f49747.1>
138    //FileInputStream stream = new FileInputStream (data.filename);
139    //data.fileBytes = new byte [stream.available ()];
140    //stream.read (data.fileBytes);
141    //stream.close (); <ajb>
142    File idlFile = new File (data.filename);
143    int len = (int)idlFile.length ();
144    FileReader fileReader = new FileReader (idlFile);
145    // <d41679> data.fileBytes = new char [len];
146    final String EOL = System.getProperty ("line.separator");
147    data.fileBytes = new char [len + EOL.length ()];
148
149    fileReader.read (data.fileBytes, 0, len);
150    fileReader.close ();
151
152    // <d41679>
153    for (int i = 0; i < EOL.length (); i++)
154      data.fileBytes[len + i] = EOL.charAt (i);
155
156    readChar ();
157  } // readFile
158
159  /**
160   *
161   **/
162  Token getToken () throws IOException
163  {
164    //System.out.println ("Scanner.getToken char = |" + data.ch + "| (ASCII " + (int)data.ch + ").");
165
166    // The token could be null if the next set of characters
167    // is not a token:  white space, comments, ignored preprocessor
168    // commands.
169    Token token = null;
170    String commentText = new String ("");
171
172    while (token == null)
173      try
174      {
175        data.oldIndex = data.fileIndex;
176        data.oldLine  = data.line;
177        if (data.ch <= ' ') {
178          skipWhiteSpace ();
179          continue;
180        }
181
182        // Special case for wchar and wstring literals.
183        // The values are preceded by an L.
184        //
185        // Bug fix 4382578:  Can't compile a wchar literal.
186        //
187        // REVISIT.  This maps wchar/wstring literals to
188        // our char/string literal types.  Eventually, we
189        // need to write stronger checking to be spec
190        // compliant in negative cases such as leaving the
191        // L off of a wide string or putting it on a string.
192        if (data.ch == 'L') {
193            // Check to see if the next character is an
194            // apostrophe.
195            readChar();
196            // Note:  This doesn't allow for space between
197            // the L and the apostrophe or quote.
198            if (data.ch == '\'') {
199                // It was a wchar literal.  Get the value
200                // and return the token.
201                token = getCharacterToken(true);
202                readChar();
203                continue;
204            } else
205            if (data.ch == '"') {
206                // It was a wstring literal.
207                //
208                // getUntil assumes we've already passed the
209                // first quote.
210                readChar ();
211                token = new Token (Token.StringLiteral, getUntil ('"'), true);
212                readChar ();
213                continue;
214            } else {
215                // It must not have been a wchar literal.
216                // Push the input back into the buffer, and
217                // fall to the next if case.
218                unread(data.ch);
219                unread('L');
220                readChar();
221            }
222        }
223
224        if ((data.ch >= 'a' && data.ch <= 'z') ||
225            (data.ch >= 'A' && data.ch <= 'Z') ||
226            // <f46082.40> Escaped identifier; see data member comments.
227            //(data.ch == '_' && underscoreOK)   || <daz>
228            (data.ch == '_')   ||
229            Character.isLetter (data.ch)) {
230            token = getString ();
231        } else
232        if ((data.ch >= '0' && data.ch <= '9') || data.ch == '.') {
233            token = getNumber ();
234        } else {
235          switch (data.ch)
236          {
237            case ';':
238              token = new Token (Token.Semicolon);
239              break;
240            case '{':
241              token = new Token (Token.LeftBrace);
242              break;
243            case '}':
244              token = new Token (Token.RightBrace);
245              break;
246            case ':':
247              readChar ();
248              if (data.ch == ':')
249                token = new Token (Token.DoubleColon);
250              else
251              {
252                unread (data.ch);
253                token = new Token (Token.Colon);
254              }
255              break;
256            case ',':
257              token = new Token (Token.Comma);
258              break;
259            case '=':
260              readChar ();
261              if (data.ch == '=')
262                token = new Token (Token.DoubleEqual);
263              else
264              {
265                unread (data.ch);
266                token = new Token (Token.Equal);
267              }
268              break;
269            case '+':
270              token = new Token (Token.Plus);
271              break;
272            case '-':
273              token = new Token (Token.Minus);
274              break;
275            case '(':
276              token = new Token (Token.LeftParen);
277              break;
278            case ')':
279              token = new Token (Token.RightParen);
280              break;
281            case '<':
282              readChar ();
283              if (data.ch == '<')
284                token = new Token (Token.ShiftLeft);
285              else if (data.ch == '=')
286                token = new Token (Token.LessEqual);
287              else
288              {
289                unread (data.ch);
290                token = new Token (Token.LessThan);
291              }
292              break;
293            case '>':
294              readChar ();
295              if (data.ch == '>')
296                token = new Token (Token.ShiftRight);
297              else if (data.ch == '=')
298                token = new Token (Token.GreaterEqual);
299              else
300              {
301                unread (data.ch);
302                token = new Token (Token.GreaterThan);
303              }
304              break;
305            case '[':
306              token = new Token (Token.LeftBracket);
307              break;
308            case ']':
309              token = new Token (Token.RightBracket);
310              break;
311            case '\'':
312              token = getCharacterToken(false);
313              break;
314            case '"':
315              readChar ();
316              token = new Token (Token.StringLiteral, getUntil ('"', false, false, false));
317              break;
318            case '\\':
319              readChar ();
320              // If this is at the end of a line, then it is the
321              // line continuation character - treat it as white space
322              if (data.ch == '\n' || data.ch == '\r')
323                token = null;
324              else
325                token = new Token (Token.Backslash);
326              break;
327            case '|':
328              readChar ();
329              if (data.ch == '|')
330                token = new Token (Token.DoubleBar);
331              else
332              {
333                unread (data.ch);
334                token = new Token (Token.Bar);
335              }
336              break;
337            case '^':
338              token = new Token (Token.Carat);
339              break;
340            case '&':
341              readChar ();
342              if (data.ch == '&')
343                token = new Token (Token.DoubleAmpersand);
344              else
345              {
346                unread (data.ch);
347                token = new Token (Token.Ampersand);
348              }
349              break;
350            case '*':
351              token = new Token (Token.Star);
352              break;
353            case '/':
354              readChar ();
355              // <21jul1997daz>  Extract comments rather than skipping them.
356              // Preserve only the comment immediately preceding the next token.
357              if (data.ch == '/')
358                //skipLineComment ();
359                commentText = getLineComment();
360              else if (data.ch == '*')
361                //skipBlockComment ();
362                commentText = getBlockComment();
363              else
364              {
365                unread (data.ch);
366                token = new Token (Token.Slash);
367              }
368              break;
369            case '%':
370              token = new Token (Token.Percent);
371              break;
372            case '~':
373              token = new Token (Token.Tilde);
374              break;
375
376            // The period token is recognized in getNumber.
377            // The period is only valid in a floating ponit number.
378            //case '.':
379            //  token = new Token (Token.Period);
380            //  break;
381
382            case '#':
383              token = getDirective ();
384              break;
385            case '!':
386              readChar ();
387              if (data.ch == '=')
388                token = new Token (Token.NotEqual);
389              else
390              {
391                unread (data.ch);
392                token = new Token (Token.Exclamation);
393              }
394              break;
395            case '?':
396              try
397              {
398                token = replaceTrigraph ();
399                break;
400              }
401              catch (InvalidCharacter e) {}
402            default:
403              throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch);
404          }
405          readChar ();
406        }
407      }
408      catch (EOFException e)
409      {
410        token = new Token (Token.EOF);
411      }
412
413    // Transfer comment to parser via token.  <daz>21jul1997
414    token.comment = new Comment( commentText );
415
416    //System.out.println ("Scanner.getToken returning token.type = " + token.type);
417    //if (token.type == Token.Identifier || token.type == Token.MacroIdentifier || (token.type >= Token.BooleanLiteral && token.type <= Token.StringLiteral))
418    //  System.out.println ("Scanner.getToken returns token.name = " + token.name);
419
420    if (debug)
421        System.out.println( "Token: " + token ) ;
422
423    return token;
424  } // getToken
425
426  /**
427   *
428   **/
429  void scanString (String string)
430  {
431    dataStack.push (data);
432
433    data = new ScannerData (data);
434
435    data.fileIndex = 0;
436    data.oldIndex  = 0;
437    // <f49747.1> data.fileBytes = string.getBytes (); <ajb>
438    int strLen = string.length();
439    data.fileBytes = new char[strLen];
440    string.getChars (0, strLen, data.fileBytes, 0);
441
442    data.macrodata = true;
443
444    try {readChar ();} catch (IOException e) {}
445  } // scanString
446
447  /**
448   *
449   **/
450  void scanIncludedFile (IncludeEntry file, String filename, boolean includeIsImport) throws IOException
451  {
452    dataStack.push (data);
453    data = new ScannerData ();
454    data.indent = ((ScannerData)dataStack.peek ()).indent + ' ';
455    data.includeIsImport = includeIsImport;
456    try
457    {
458      readFile (file, filename);
459      if (!emitAll && includeIsImport)
460        SymtabEntry.enteringInclude ();
461      // <d56351> As of CORBA 2.3, include files define new scope for Repository
462      // ID prefixes. The previous Rep. ID is just below the top of the stack and
463      // must be restored when the contents of this include file are parsed (see readCh()).
464      Parser.enteringInclude ();
465
466      if (verbose)
467        System.out.println (data.indent + Util.getMessage ("Compile.parsing", filename));
468    }
469    catch (IOException e)
470    {
471      data = (ScannerData)dataStack.pop ();
472      throw e;
473    }
474  } // scanIncludedFile
475
476  /**
477   *
478   **/
479  private void unread (char ch)
480  {
481    if (ch == '\n' && !data.macrodata) --data.line;
482    --data.fileIndex;
483  } // unread
484
485  /**
486   *
487   **/
488  void readChar () throws IOException
489  {
490    if (data.fileIndex >= data.fileBytes.length)
491      if (dataStack.empty ())
492        throw new EOFException ();
493      else
494      {
495        // <d56351> Indicate end-of-scope for include file to parser.
496        //Parser.exitingInclude ();
497
498        // IBM.11666 - begin
499        //if (!emitAll && data.includeIsImport && !data.macrodata)
500        //{
501        //SymtabEntry.exitingInclude ();
502        //Parser.exitingInclude (); // <d59469>
503        //}
504        if (!data.macrodata)
505        {
506            if (!emitAll && data.includeIsImport)
507                SymtabEntry.exitingInclude();
508            Parser.exitingInclude();
509        } // IBM.11666 - end
510
511        if (verbose && !data.macrodata)
512          System.out.println (data.indent + Util.getMessage ("Compile.parseDone", data.filename));
513        data = (ScannerData)dataStack.pop ();
514      }
515    else
516    {
517      data.ch = (char)(data.fileBytes[data.fileIndex++] & 0x00ff);
518      if (data.ch == '\n' && !data.macrodata) ++data.line;
519    }
520  } // readChar
521
522  /**
523   * Starting at a quote, reads a string with possible
524   * unicode or octal values until an end quote.  Doesn't
525   * handle line feeds or comments.
526   */
527  private String getWString() throws IOException
528  {
529      readChar();
530      StringBuffer result = new StringBuffer();
531
532      while (data.ch != '"') {
533          if (data.ch == '\\') {
534              // Could be a \ooo octal or
535              // unicode hex
536              readChar();
537              if (data.ch == 'u') {
538                  // Unicode hex
539                  int num = getNDigitHexNumber(4);
540                  System.out.println("Got num: " + num);
541                  System.out.println("Which is: " + (int)(char)num);
542                  result.append((char)num);
543                  // result.append((char)getNDigitHexNumber(4));
544                  // getNDigitHexNumber reads the next
545                  // character, so loop without reading another
546                  continue;
547              } else
548              if (data.ch >= '0' && data.ch <= '7') {
549                  // Octal
550                  result.append((char)get3DigitOctalNumber());
551                  // get3DigitOctalNumber reads the next
552                  // character, so loop without reading another
553                  continue;
554              } else {
555                  // Wasn't either, so just append the
556                  // slash and current character.
557                  result.append('\\');
558                  result.append(data.ch);
559              }
560          } else {
561              // Just append the character
562              result.append(data.ch);
563          }
564
565          // Advance to the next character
566          readChar();
567      }
568
569      return result.toString();
570  }
571
572  /**
573   *
574   **/
575  private Token getCharacterToken(boolean isWide) throws IOException
576  {
577    // The token name returned contains a string with two elements:
578    // first the character appears, then the representation of the
579    // character.  These are typically the same, but they CAN be
580    // different, for example "O\117"
581    Token token = null;
582    readChar ();
583    if ( data.ch == '\\' )
584    {
585      readChar ();
586      if ((data.ch == 'x') || (data.ch == 'u'))
587      {
588        char charType = data.ch;
589        int hexNum = getNDigitHexNumber ((charType == 'x') ? 2 : 4);
590        return new Token (Token.CharacterLiteral,
591            ((char)hexNum) + "\\" + charType + Integer.toString (hexNum, 16), isWide );
592      }
593      if ((data.ch >= '0') && (data.ch <= '7'))
594      {
595        int octNum = get3DigitOctalNumber ();
596        return new Token (Token.CharacterLiteral,
597            ((char)octNum) + "\\" + Integer.toString (octNum, 8), isWide );
598      }
599      return singleCharEscapeSequence (isWide);
600    }
601    token = new Token (Token.CharacterLiteral, "" + data.ch + data.ch, isWide );
602    readChar ();
603    return token;
604  } // getCharacterToken
605
606  /**
607   *
608   **/
609  private Token singleCharEscapeSequence (boolean isWide) throws IOException
610  {
611    Token token;
612    if (data.ch == 'n')
613      // newline
614      token = new Token (Token.CharacterLiteral, "\n\\n", isWide);
615    else if (data.ch == 't')
616      // horizontal tab
617      token = new Token (Token.CharacterLiteral, "\t\\t", isWide);
618    else if (data.ch == 'v')
619      // vertical tab
620      token = new Token (Token.CharacterLiteral, "\013\\v", isWide);
621    else if (data.ch == 'b')
622      // backspace
623      token = new Token (Token.CharacterLiteral, "\b\\b", isWide);
624    else if (data.ch == 'r')
625      // carriage return
626      token = new Token (Token.CharacterLiteral, "\r\\r", isWide);
627    else if (data.ch == 'f')
628      // form feed
629      token = new Token (Token.CharacterLiteral, "\f\\f", isWide);
630    else if (data.ch == 'a')
631      // alert
632      token = new Token (Token.CharacterLiteral, "\007\\a", isWide);
633    else if (data.ch == '\\')
634      // backslash
635      token = new Token (Token.CharacterLiteral, "\\\\\\", isWide);
636    else if (data.ch == '?')
637      // question mark
638      token = new Token (Token.CharacterLiteral, "?\\?", isWide);
639    else if (data.ch == '\'')
640      // single quote
641      token = new Token (Token.CharacterLiteral, "'\\'", isWide);
642    else if (data.ch == '"')
643      // double quote
644      token = new Token (Token.CharacterLiteral, "\"\\\"", isWide);
645    else
646      throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch);
647    readChar ();
648    return token;
649  } // singleCharEscapeSequence
650
651  private Token getString () throws IOException
652  {
653    StringBuffer sbuf = new StringBuffer() ;
654    boolean escaped = false;  // <d59166>
655    boolean[] collidesWithKeyword = { false } ;  // <d62023>
656
657    // <f46082.40> An escaped id. begins with '_', which is followed by a normal
658    // identifier.  Disallow prefixes of '_' having length > 1.
659    if (data.ch == '_') {
660        sbuf.append( data.ch ) ;
661        readChar ();
662        if (escaped = escapedOK)
663            if (data.ch == '_')
664                throw new InvalidCharacter (data.filename, currentLine (),
665                    currentLineNumber (), currentLinePosition (), data.ch);
666    }
667
668    // Build up the string of valid characters until a non-string
669    // character is encountered.
670    while (Character.isLetterOrDigit( data.ch ) || (data.ch == '_')) {
671        sbuf.append( data.ch ) ;
672        readChar() ;
673    }
674
675    String string = sbuf.toString() ;
676
677    // <f46082.40> Escaped identifiers - If identifier has '_' prefix, ignore
678    // keyword check and strip '_'; otherwise, perform keyword check.
679
680    if (!escaped) { // Escaped id ==> ignore keyword check
681        Token result = Token.makeKeywordToken( string, corbaLevel, escapedOK,
682            collidesWithKeyword ) ;
683        if (result != null)
684            return result ;
685    }
686
687    // At this point the string is an identifier.  If it is a
688    // string which is also a Java keyword, prepend an underscore
689    // so that it doesn't generate a compiler error.
690    string = getIdentifier (string);
691
692    // If a left paren immediately follows, this could be a
693    // macro definition, return a MacroIdentifier
694    if (data.ch == '(') {
695        readChar ();
696        return new Token (Token.MacroIdentifier, string, escaped,
697            collidesWithKeyword[0], false);
698    } else
699        return new Token (Token.Identifier, string, escaped,
700            collidesWithKeyword[0], false);
701  }
702
703  // Wildcard values
704  static final int Star = 0, Plus = 1, Dot = 2, None = 3;
705
706  /**
707   *
708   **/
709  private boolean matchesClosedWildKeyword (String string)
710  {
711    boolean     found     = true;
712    String      tmpString = string;
713    Enumeration e         = wildcardKeywords.elements ();
714    while (e.hasMoreElements ())
715    {
716      int             wildcard = None;
717      StringTokenizer tokens   = new StringTokenizer ((String)e.nextElement (), "*+.", true);
718      if (tokens.hasMoreTokens ())
719      {
720        String token = tokens.nextToken ();
721        if (tmpString.startsWith (token))
722        {
723          tmpString = tmpString.substring (token.length ());
724          while (tokens.hasMoreTokens () && found)
725          {
726            token = tokens.nextToken ();
727            if (token.equals ("*"))
728              wildcard = Star;
729            else if (token.equals ("+"))
730              wildcard = Plus;
731            else if (token.equals ("."))
732              wildcard = Dot;
733            else if (wildcard == Star)
734            {
735              int index = tmpString.indexOf (token);
736              if (index >= 0)
737                tmpString = tmpString.substring (index + token.length ());
738              else
739                found = false;
740            }
741            else if (wildcard == Plus)
742            {
743              int index = tmpString.indexOf (token);
744              if (index > 0)
745                tmpString = tmpString.substring (index + token.length ());
746              else
747                found = false;
748            }
749            else if (wildcard == Dot)
750            {
751              int index = tmpString.indexOf (token);
752              if (index == 1)
753                tmpString = tmpString.substring (1 + token.length ());
754              else
755                found = false;
756            }
757          }
758          if (found && tmpString.equals (""))
759            break;
760        }
761      }
762    }
763    return found && tmpString.equals ("");
764  } // matchesClosedWildKeyword
765
766  /**
767   *
768   **/
769  private String matchesOpenWildcard (String string)
770  {
771    Enumeration e = openEndedKeywords.elements ();
772    String prepend = "";
773    while (e.hasMoreElements ())
774    {
775      int             wildcard  = None;
776      boolean         found     = true;
777      String          tmpString = string;
778      StringTokenizer tokens    = new StringTokenizer ((String)e.nextElement (), "*+.", true);
779      while (tokens.hasMoreTokens () && found)
780      {
781        String token = tokens.nextToken ();
782        if (token.equals ("*"))
783          wildcard = Star;
784        else if (token.equals ("+"))
785          wildcard = Plus;
786        else if (token.equals ("."))
787          wildcard = Dot;
788        else if (wildcard == Star)
789        {
790          wildcard = None;
791          int index = tmpString.lastIndexOf (token);
792          if (index >= 0)
793            tmpString = blankOutMatch (tmpString, index, token.length ());
794          else
795            found = false;
796        }
797        else if (wildcard == Plus)
798        {
799          wildcard = None;
800          int index = tmpString.lastIndexOf (token);
801          if (index > 0)
802            tmpString = blankOutMatch (tmpString, index, token.length ());
803          else
804            found = false;
805        }
806        else if (wildcard == Dot)
807        {
808          wildcard = None;
809          int index = tmpString.lastIndexOf (token);
810          if (index == 1)
811            tmpString = blankOutMatch (tmpString, 1, token.length ());
812          else
813            found = false;
814        }
815        else if (wildcard == None)
816          if (tmpString.startsWith (token))
817            tmpString = blankOutMatch (tmpString, 0, token.length ());
818          else
819            found = false;
820      }
821
822      // Make sure that, if the last character of the keyword is a
823      // wildcard, that the string matches what the wildcard
824      // requires.
825      if (found)
826      {
827        if (wildcard == Star)
828          ;
829        else if (wildcard == Plus && tmpString.lastIndexOf (' ') != tmpString.length () - 1)
830          ;
831        else if (wildcard == Dot && tmpString.lastIndexOf (' ') == tmpString.length () - 2)
832          ;
833        else if (wildcard == None && tmpString.lastIndexOf (' ') == tmpString.length () - 1)
834          ;
835        else
836          found = false;
837      }
838      // If found, then prepend an underscore.  But also try matching
839      // again after leading and trailing blanks are removed from
840      // tmpString.  This isn't quite right, but it solves a problem
841      // which surfaced in the Java mapping.  For example:
842      // openEndedKeywords = {"+Helper", "+Holder", "+Package"};
843      // string            = fooHelperPackage.
844      // Given the mechanics of the Java mapping, _fooHelperPackage
845      // COULD have a conflict, so for each occurance of a keyword,
846      // an underscore is added, so this would cause two underscores:
847      // __fooHelperPackage.  To accomplish this, the first time thru
848      // tmpString is "fooHelper       " at this point, strip off the
849      // trailing blanks and try matching "fooHelper".  This also
850      // matches, so two underscores are prepended.
851      if (found)
852      {
853        prepend = prepend + "_" + matchesOpenWildcard (tmpString.trim ());
854        break;
855      }
856    }
857    return prepend;
858  } // matchesOpenWildcard
859
860  /**
861   *
862   **/
863  private String blankOutMatch (String string, int start, int length)
864  {
865    char[] blanks = new char [length];
866    for (int i = 0; i < length; ++i)
867      blanks[i] = ' ';
868    return string.substring (0, start) + new String (blanks) + string.substring (start + length);
869  } // blankOutMatch
870
871  /**
872   *
873   **/
874  private String getIdentifier (String string)
875  {
876    if (keywords.contains (string))
877      // string matches a non-wildcard keyword
878      string = '_' + string;
879    else
880    {
881      // Check to see if string matches any wildcard keywords that
882      // aren't open ended (don't have a wildcard as the first or
883      // last character.
884      String prepend = "";
885      if (matchesClosedWildKeyword (string))
886        prepend = "_";
887      else
888        // string did not match any closed wildcard keywords (that
889        // is, keywords with wildcards anywhere but at the beginning
890        // or end of the word).
891        // Now check for * + or . at the beginning or end.
892        // These require special handling because they could match
893        // more than one keyword.  prepend an underscore for each
894        // matched keyword.
895        prepend = matchesOpenWildcard (string);
896      string = prepend + string;
897    }
898    return string;
899  } // getIdentifier
900
901  /**
902   *
903   **/
904  private Token getDirective () throws IOException
905  {
906    readChar ();
907    String string = new String ();
908    while ((data.ch >= 'a' && data.ch <= 'z') || (data.ch >= 'A' && data.ch <= 'Z'))
909    {
910      string = string + data.ch;
911      readChar ();
912    }
913    unread (data.ch);
914    for (int i = 0; i < Token.Directives.length; ++i)
915      if (string.equals (Token.Directives[i]))
916        return new Token (Token.FirstDirective + i);
917    // If it got this far, it is an unknown preprocessor directive.
918    return new Token (Token.Unknown, string);
919  } // getDirective
920
921  /**
922   *
923   **/
924  private Token getNumber () throws IOException
925  {
926    if (data.ch == '.')
927      return getFractionNoInteger ();
928    else if (data.ch == '0')
929      return isItHex ();
930    else // the only other possibliities are 1..9
931      return getInteger ();
932  } // getNumber
933
934  /**
935   *
936   **/
937  private Token getFractionNoInteger () throws IOException
938  {
939    readChar ();
940    if (data.ch >= '0' && data.ch <= '9')
941      return getFraction (".");
942    else
943      return new Token (Token.Period);
944  } // getFractionNoInteger
945
946  /**
947   *
948   **/
949  private Token getFraction (String string) throws IOException
950  {
951    while (data.ch >= '0' && data.ch <= '9')
952    {
953      string = string + data.ch;
954      readChar ();
955    }
956    if (data.ch == 'e' || data.ch == 'E')
957      return getExponent (string + 'E');
958    else
959      return new Token (Token.FloatingPointLiteral, string);
960  } // getFraction
961
962  /**
963   *
964   **/
965  private Token getExponent (String string) throws IOException
966  {
967    readChar ();
968    if (data.ch == '+' || data.ch == '-')
969    {
970      string = string + data.ch;
971      readChar ();
972    }
973    else if (data.ch < '0' || data.ch > '9')
974      throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch);
975    while (data.ch >= '0' && data.ch <= '9')
976    {
977      string = string + data.ch;
978      readChar ();
979    }
980    return new Token (Token.FloatingPointLiteral, string);
981  } // getExponent
982
983  /**
984   *
985   **/
986  private Token isItHex () throws IOException
987  {
988    readChar ();
989    if (data.ch == '.')
990    {
991      readChar ();
992      return getFraction ("0.");
993    }
994    else if (data.ch == 'x' || data.ch == 'X')
995      return getHexNumber ("0x");
996    else if (data.ch == '8' || data.ch == '9')
997      throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch);
998    else if (data.ch >= '0' && data.ch <= '7')
999      return getOctalNumber ();
1000    else if (data.ch == 'e' || data.ch == 'E')
1001      return getExponent ("0E");
1002    else
1003      return new Token (Token.IntegerLiteral, "0");
1004  } // isItHex
1005
1006  /**
1007   *
1008   **/
1009  private Token getOctalNumber () throws IOException
1010  {
1011    String string = "0" + data.ch;
1012    readChar ();
1013    while ((data.ch >= '0' && data.ch <= '9'))
1014    {
1015      if (data.ch == '8' || data.ch == '9')
1016        throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch);
1017      string = string + data.ch;
1018      readChar ();
1019    }
1020    return new Token (Token.IntegerLiteral, string);
1021  } // getOctalNumber
1022
1023  /**
1024   *
1025   **/
1026  private Token getHexNumber (String string) throws IOException
1027  {
1028    readChar ();
1029    if ((data.ch < '0' || data.ch > '9') && (data.ch < 'a' || data.ch > 'f') && (data.ch < 'A' || data.ch > 'F'))
1030      throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch);
1031    else
1032      while ((data.ch >= '0' && data.ch <= '9') || (data.ch >= 'a' && data.ch <= 'f') || (data.ch >= 'A' && data.ch <= 'F'))
1033      {
1034        string = string + data.ch;
1035        readChar ();
1036      }
1037    return new Token (Token.IntegerLiteral, string);
1038  } // getHexNumber
1039
1040  /**
1041   *
1042   **/
1043  private int getNDigitHexNumber (int n) throws IOException
1044  {
1045    readChar ();
1046    if (!isHexChar (data.ch))
1047      throw new InvalidCharacter (data.filename, currentLine (),
1048          currentLineNumber (), currentLinePosition (), data.ch);
1049    String string = "" + data.ch;
1050    readChar ();
1051    for (int i = 2; i <= n; i++)
1052    {
1053      if (!isHexChar( data.ch))
1054        break;
1055      string += data.ch;
1056      readChar ();
1057    }
1058    try
1059    {
1060      return Integer.parseInt (string, 16);
1061    }
1062    catch (NumberFormatException e)
1063    {
1064    }
1065    return 0;
1066  } // getNDigitHexNumber
1067
1068  /**
1069   *
1070   **/
1071  private boolean isHexChar ( char hex )
1072  {
1073    return ((data.ch >= '0') && (data.ch <= '9')) ||
1074        ((data.ch >= 'a') && (data.ch <= 'f')) ||
1075        ((data.ch >= 'A') && (data.ch <= 'F'));
1076  }
1077
1078  /**
1079   *
1080   **/
1081  private int get3DigitOctalNumber () throws IOException
1082  {
1083    char firstDigit = data.ch;
1084    String string = "" + data.ch;
1085    readChar ();
1086    if (data.ch >= '0' && data.ch <= '7')
1087    {
1088      string = string + data.ch;
1089      readChar ();
1090      if (data.ch >= '0' && data.ch <= '7')
1091      {
1092        string = string + data.ch;
1093        if (firstDigit > '3')
1094          // This is a 3-digit number bigger than 377
1095          throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), firstDigit);
1096        readChar ();
1097      }
1098    }
1099    int ret = 0;
1100    try
1101    {
1102      ret = Integer.parseInt (string, 8);
1103    }
1104    catch (NumberFormatException e)
1105    {
1106      throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), string.charAt (0));
1107    }
1108    return ret;
1109  } // get3DigitOctalNumber
1110
1111  /**
1112   *
1113   **/
1114  private Token getInteger () throws IOException
1115  {
1116    String string = "" + data.ch;
1117    readChar ();
1118    if (data.ch == '.')
1119    {
1120      readChar ();
1121      return getFraction (string + '.');
1122    }
1123    else  if (data.ch == 'e' || data.ch == 'E')
1124      return getExponent (string + 'E');
1125    else if (data.ch >= '0' && data.ch <= '9')
1126      while (data.ch >= '0' && data.ch <= '9')
1127      {
1128        string = string + data.ch;
1129        readChar ();
1130        if (data.ch == '.')
1131        {
1132          readChar ();
1133          return getFraction (string + '.');
1134        }
1135      }
1136    return new Token (Token.IntegerLiteral, string);
1137  } // getInteger
1138
1139  /**
1140   *
1141   **/
1142  private Token replaceTrigraph () throws IOException
1143  {
1144    readChar ();
1145    if (data.ch == '?')
1146    {
1147      readChar ();
1148      if (data.ch == '=')
1149        data.ch = '#';
1150      else if (data.ch == '/')
1151        data.ch = '\\';
1152      else if (data.ch == '\'')
1153        data.ch = '^';
1154      else if (data.ch == '(')
1155        data.ch = '[';
1156      else if (data.ch == ')')
1157        data.ch = ']';
1158      else if (data.ch == '!')
1159        data.ch = '|';
1160      else if (data.ch == '<')
1161        data.ch = '{';
1162      else if (data.ch == '>')
1163        data.ch = '}';
1164      else if (data.ch == '-')
1165        data.ch = '~';
1166      else
1167      {
1168        unread (data.ch);
1169        unread ('?');
1170        throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch);
1171      }
1172      return getToken ();
1173    }
1174    else
1175    {
1176      unread ('?');
1177      throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch);
1178    }
1179  } // replaceTrigraph
1180
1181  /**
1182   *
1183   **/
1184  void skipWhiteSpace () throws IOException
1185  {
1186    while (data.ch <= ' ')
1187      readChar ();
1188  } // skipWhiteSpace
1189
1190  /**
1191   *
1192   **/
1193  private void skipBlockComment () throws IOException
1194  {
1195    try
1196    {
1197      boolean done = false;
1198      readChar ();
1199      while (!done)
1200      {
1201        while (data.ch != '*')
1202          readChar ();
1203        readChar ();
1204        if (data.ch == '/')
1205          done = true;
1206      }
1207    }
1208    catch (EOFException e)
1209    {
1210      ParseException.unclosedComment (data.filename);
1211      throw e;
1212    }
1213  } // skipBlockComment
1214
1215  /**
1216   *
1217   **/
1218  void skipLineComment () throws IOException
1219  {
1220    while (data.ch != '\n')
1221      readChar ();
1222  } // skipLineComment
1223
1224  // The following two routines added to extract comments rather
1225  // than ignore them.
1226
1227  /**
1228   * Extract a line comment from the input buffer.
1229   **/
1230  private String getLineComment () throws IOException
1231  {
1232    StringBuffer sb = new StringBuffer( "/" );
1233    while (data.ch != '\n')
1234    {
1235      if (data.ch != '\r')
1236        sb.append (data.ch);
1237      readChar ();
1238    }
1239    return sb.toString();
1240  } // getLineComment
1241
1242  /**
1243   * Extract a block comment from the input buffer.
1244   **/
1245  private String getBlockComment () throws IOException
1246  {
1247    StringBuffer sb = new StringBuffer ("/*");
1248    try
1249    {
1250      boolean done = false;
1251      readChar ();
1252      sb.append (data.ch);
1253      while (!done)
1254      {
1255        while (data.ch != '*')
1256        {
1257          readChar ();
1258          sb.append (data.ch);
1259        }
1260        readChar ();
1261        sb.append (data.ch);
1262        if (data.ch == '/')
1263          done = true;
1264      }
1265    }
1266    catch (EOFException e)
1267    {
1268      ParseException.unclosedComment (data.filename);
1269      throw e;
1270    }
1271    return sb.toString ();
1272  } // getBlockComment
1273
1274  /**
1275   *
1276   **/
1277  Token skipUntil (char c) throws IOException
1278  {
1279    while (data.ch != c)
1280    {
1281      if (data.ch == '/')
1282      {
1283        readChar ();
1284        if (data.ch == '/')
1285        {
1286          skipLineComment ();
1287          // If this is skipping until the newline, skipLineComment
1288          // reads past the newline, so it won't be seen by the
1289          // while loop conditional check.
1290          if (c == '\n') break;
1291        }
1292        else if (data.ch == '*')
1293          skipBlockComment ();
1294      }
1295      else
1296        readChar ();
1297    }
1298    return getToken ();
1299  } // skipUntil
1300
1301  // getUntil is used for macro definitions and to get quoted
1302  // strings, so characters within "("...")" and '"'...'"' are
1303  // ignored.  Ie getUntil ',' on (,,,,),X will return (,,,,)
1304
1305  String getUntil (char c) throws IOException
1306  {
1307      return getUntil (c, true, true, true);
1308  }
1309
1310  String getUntil (char c, boolean allowQuote, boolean allowCharLit, boolean allowComment) throws IOException
1311  {
1312    String string = "";
1313    while (data.ch != c)
1314      string = appendToString (string, allowQuote, allowCharLit, allowComment);
1315    return string;
1316  } // getUntil
1317
1318  /**
1319   *
1320   **/
1321  String getUntil (char c1, char c2) throws IOException
1322  {
1323    String string = "";
1324    while (data.ch != c1 && data.ch != c2)
1325      string = appendToString (string, false, false, false);
1326    return string;
1327  } // getUntil
1328
1329  /**
1330   *
1331   **/
1332  private String appendToString (String string, boolean allowQuote, boolean allowCharLit, boolean allowComment) throws IOException
1333  {
1334    // Ignore any comments if they are allowed
1335    if (allowComment && data.ch == '/')
1336    {
1337      readChar ();
1338      if (data.ch == '/')
1339        skipLineComment ();
1340      else if (data.ch == '*')
1341        skipBlockComment ();
1342      else
1343        string = string + '/';
1344    }
1345    // Handle line continuation character
1346    else if (data.ch == '\\')
1347    {
1348      readChar ();
1349      if (data.ch == '\n')
1350        readChar ();
1351      else if (data.ch == '\r')
1352      {
1353        readChar ();
1354        if (data.ch == '\n')
1355          readChar ();
1356      }
1357      else
1358      {
1359        string = string + '\\' + data.ch;
1360        readChar ();
1361      }
1362    }
1363    // characters within "("...")" and '"'...'"' are ignored.
1364    // Ie getUntil ',' on (,,,,),X will return (,,,)
1365    else
1366    {
1367      if (allowCharLit && data.ch == '"')
1368      {
1369        readChar ();
1370        string = string + '"';
1371        while (data.ch != '"')
1372          string = appendToString (string, true, false, allowComment);
1373      }
1374      else if (allowQuote && allowCharLit && data.ch == '(')
1375      {
1376        readChar ();
1377        string = string + '(';
1378        while (data.ch != ')')
1379          string = appendToString (string, false, false, allowComment);
1380      }
1381      else if (allowQuote && data.ch == '\'')
1382      {
1383        readChar ();
1384        string = string + "'";
1385        while (data.ch != '\'')
1386          string = appendToString (string, false, true, allowComment);
1387      }
1388      string = string + data.ch;
1389      readChar ();
1390    }
1391    return string;
1392  } // appendToString
1393
1394  /**
1395   *
1396   **/
1397  String getStringToEOL () throws IOException
1398  {
1399    String string = new String ();
1400    while (data.ch != '\n')
1401    {
1402      if (data.ch == '\\')
1403      {
1404        readChar ();
1405        if (data.ch == '\n')
1406          readChar ();
1407        else if (data.ch == '\r')
1408        {
1409          readChar ();
1410          if (data.ch == '\n')
1411            readChar ();
1412        }
1413        else
1414        {
1415          string = string + data.ch;
1416          readChar ();
1417        }
1418      }
1419      else
1420      {
1421        string = string + data.ch;
1422        readChar ();
1423      }
1424    }
1425    return string;
1426  } // getStringToEOL
1427
1428  /**
1429   *
1430   **/
1431  String filename ()
1432  {
1433    return data.filename;
1434  } // filename
1435
1436  /**
1437   *
1438   **/
1439  IncludeEntry fileEntry ()
1440  {
1441    return data.fileEntry;
1442  } // fileEntry
1443
1444  /**
1445   *
1446   **/
1447  int currentLineNumber ()
1448  {
1449    return data.line;
1450  } // currentLineNumber
1451
1452  /**
1453   *
1454   **/
1455  int lastTokenLineNumber ()
1456  {
1457    return data.oldLine;
1458  } // lastTokenLineNumber
1459
1460  private int BOL; // Beginning Of Line
1461
1462  /**
1463   *
1464   **/
1465  String currentLine ()
1466  {
1467    BOL = data.fileIndex - 1;
1468    try
1469    {
1470      // If the current position is at the end of the line,
1471      // set BOL to before the end of the line so the whole
1472      // line is returned.
1473      if (data.fileBytes[BOL - 1] == '\r' && data.fileBytes[BOL] == '\n')
1474        BOL -= 2;
1475      else if (data.fileBytes[BOL] == '\n')
1476        --BOL;
1477      while (data.fileBytes[BOL] != '\n')
1478        --BOL;
1479    }
1480    catch (ArrayIndexOutOfBoundsException e)
1481    {
1482      BOL = -1;
1483    }
1484    ++BOL; // Go to the first character AFTER the newline
1485    int EOL = data.fileIndex - 1;
1486    try
1487    {
1488      while (data.fileBytes[EOL] != '\n' && data.fileBytes[EOL] != '\r')
1489        ++EOL;
1490    }
1491    catch (ArrayIndexOutOfBoundsException e)
1492    {
1493      EOL = data.fileBytes.length;
1494    }
1495    if (BOL < EOL)
1496      return new String (data.fileBytes, BOL, EOL - BOL);
1497    else
1498      return "";
1499  } // currentLine
1500
1501  /**
1502   *
1503   **/
1504  String lastTokenLine ()
1505  {
1506    int saveFileIndex = data.fileIndex;
1507    data.fileIndex = data.oldIndex;
1508    String ret = currentLine ();
1509    data.fileIndex = saveFileIndex;
1510    return ret;
1511  } // lastTokenLine
1512
1513  /**
1514   *
1515   **/
1516  int currentLinePosition ()
1517  {
1518    return data.fileIndex - BOL;
1519  } // currentLinePosition
1520
1521  /**
1522   *
1523   **/
1524  int lastTokenLinePosition ()
1525  {
1526    return data.oldIndex - BOL;
1527  } // lastTokenLinePosition
1528
1529  // The scanner data is moved to a separate class so that all of the
1530  // data can easily be pushed and popped to a stack.
1531
1532  // The data must be stackable for macros and #included files.  When
1533  // a macro is encountered:  the current stack data is reserved on
1534  // the stack; the stack is loaded with the macro info; processing
1535  // proceeds with this data.  The same is true for #included files.
1536
1537  // It may seem that the entire Scanner should be put on a stack in
1538  // the Parser since all the scanner data is stackable.  But that
1539  // would mean instantiating a new scanner.  The scanner must
1540  // continue from where it left off; when certain things cross file
1541  // boundaries, they must be handled by the scanner, not the parser,
1542  // things like:  block comments, quoted strings, tokens.
1543  private ScannerData data              = new ScannerData ();
1544  private Stack       dataStack         = new Stack ();
1545  private Vector      keywords          = new Vector ();
1546  private Vector      openEndedKeywords = new Vector ();
1547  private Vector      wildcardKeywords  = new Vector ();
1548  private boolean     verbose;
1549  // <f46082.40> Identifiers starting with '_' are considered "Escaped",
1550  // except when scanned during preprocessing.  Class Preprocessor is
1551  // responsible to modify the escapedOK flag accordingly.  Since preceding
1552  // underscores are now legal when scanning identifiers as well as
1553  // macro identifier, underscoreOK is obsolete.
1554  //
1555  //        boolean     underscoreOK      = false;
1556          boolean     escapedOK         = true;
1557  // <f46082.51> Remove -stateful feature.
1558  //        boolean     stateful;
1559  private boolean     emitAll;
1560  private float       corbaLevel;
1561  private boolean     debug ;
1562} // class Scanner
1563
1564// This is a dumb class, really just a struct.  It contains all of the
1565// scanner class's data in one place so that that data can be easily
1566// pushed and popped to a stack.
1567
1568/**
1569 *
1570 **/
1571class ScannerData
1572{
1573  /**
1574   *
1575   **/
1576  public ScannerData ()
1577  {
1578  } // ctor
1579
1580  /**
1581   *
1582   **/
1583  public ScannerData (ScannerData that)
1584  {
1585    indent          = that.indent;
1586    fileEntry       = that.fileEntry;
1587    filename        = that.filename;
1588    fileBytes       = that.fileBytes;
1589    fileIndex       = that.fileIndex;
1590    oldIndex        = that.oldIndex;
1591    ch              = that.ch;
1592    line            = that.line;
1593    oldLine         = that.oldLine;
1594    macrodata       = that.macrodata;
1595    includeIsImport = that.includeIsImport;
1596  } // copy ctor
1597
1598  String       indent          = "";
1599  IncludeEntry fileEntry       = null;
1600  String       filename        = "";
1601
1602  // fileBytes is a byte array rather than a char array.  This is
1603  // safe because OMG IDL is specified to be ISO Latin-1 whose high-
1604  // order byte is always 0x0.  <f49747.1> Converted from byte[] to char[]
1605  // to employ Reader classes, which have Character encoding features. <ajb>
1606  //byte[]       fileBytes       = null;
1607  char[]       fileBytes       = null;
1608  int          fileIndex       = 0;
1609  int          oldIndex        = 0;
1610  char         ch;
1611  int          line            = 1;
1612  int          oldLine         = 1;
1613  boolean      macrodata       = false;
1614  boolean      includeIsImport = false;
1615} // class ScannerData
1616