Token.java revision 608:7e06bf1dcb09
1/*
2 * Copyright (c) 1999, 2001, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25/*
26 * COMPONENT_NAME: idl.parser
27 *
28 * ORIGINS: 27
29 *
30 * Licensed Materials - Property of IBM
31 * 5639-D57 (C) COPYRIGHT International Business Machines Corp. 1997, 1999
32 * RMI-IIOP v1.0
33 *
34 */
35
36package com.sun.tools.corba.se.idl;
37
38// NOTES:
39// -F46082.51<daz> Remove -stateful feature.
40// -D59166<daz> Add support for keyword/identifier collision detection.  This
41//  feature is implemented here, rather than class Scanner, to allow the Parser
42//  to handle the problem.
43// -F60858.1<daz> Support -corba option, level <= 2.2: identify 2.3 keywords.
44// -D62023<daz> Support -corba option, level <= 2.3, identify 2.4 keywords.
45// KMC Support -corba, level <= 3.0.  Added 3.0 keywords.
46//
47// Should escaped Identifier should be a type rather than an attribute?
48//
49
50/**
51 * Class Token represents a lexeme appearing within an IDL source.  Every
52 * Token has a type.  Depending on its type and on the supported version
53 * of IDL, a Token will have other associated attributes, such as a name
54 * (identifier, e.g.), and whether it is escaped, deprecated, or is a type
55 * that is known to be in a future version of IDL.
56 **/
57class Token
58{
59  ///////////////
60  // Available types
61
62  static final int                // Keywords
63      Any                  =   0, // 2.2
64      Attribute            =   1, // |
65      Boolean              =   2, // .
66      Case                 =   3, // .
67      Char                 =   4, // .
68      Const                =   5,
69      Context              =   6,
70      Default              =   7,
71      Double               =   8,
72      Enum                 =   9,
73      Exception            =  10,
74      FALSE                =  11,
75      Fixed                =  12, // New addition
76      Float                =  13,
77      In                   =  14,
78      Inout                =  15,
79      Interface            =  16,
80      Long                 =  17,
81      Module               =  18,
82      Native               =  19, // New addition
83      Object               =  20,
84      Octet                =  21,
85      Oneway               =  22,
86      Out                  =  23,
87      Raises               =  24,
88      Readonly             =  25,
89      Sequence             =  26,
90      Short                =  27,
91      String               =  28,
92      Struct               =  29,
93      Switch               =  30,
94      TRUE                 =  31,
95      Typedef              =  32,
96      Unsigned             =  33, // .
97      Union                =  34, // .
98      Void                 =  35, // .
99      Wchar                =  36, // |
100      Wstring              =  37, // 2.2
101      // <f46082.40> New OBV keywords...
102      // <d62023> In 2.4rtf, "factory" is synonymous to "init" in 2.3
103      Init                 =  38, // 2.3 only
104      Abstract             =  39, // 2.3        2.4rtf
105      Custom               =  40, // |          |
106      Private              =  41, // |          |
107      Public               =  42, // |          |
108      Supports             =  43, // |          |
109      Truncatable          =  44, // |          |
110      ValueBase            =  45, // |          |
111      Valuetype            =  46, // 2.3        2.4rtf
112      Factory              =  47, //            2.4rtf only
113
114      // Keywords in CORBA 3.0
115      Component            =  48,
116      Consumes             =  49,
117      Emits                =  50,
118      Finder               =  51,
119      GetRaises            =  52,
120      Home                 =  53,
121      Import               =  54,
122      Local                =  55,
123      Manages              =  56,
124      Multiple             =  57,
125      PrimaryKey           =  58,
126      Provides             =  59,
127      Publishes            =  60,
128      SetRaises            =  61,
129      TypeId               =  62,
130      TypePrefix           =  63,
131      Uses                 =  64,
132
133      Identifier           =  80, // Identifier
134      MacroIdentifier      =  81, // Macro Identifier
135
136      Semicolon            = 100, // Symbols
137      LeftBrace            = 101,
138      RightBrace           = 102,
139      Colon                = 103,
140      Comma                = 104,
141      Equal                = 105,
142      Plus                 = 106,
143      Minus                = 107,
144      LeftParen            = 108,
145      RightParen           = 109,
146      LessThan             = 110,
147      GreaterThan          = 111,
148      LeftBracket          = 112,
149      RightBracket         = 113,
150      Apostrophe           = 114,
151      Quote                = 115,
152      Backslash            = 116,
153      Bar                  = 117,
154      Carat                = 118,
155      Ampersand            = 119,
156      Star                 = 120,
157      Slash                = 121,
158      Percent              = 122,
159      Tilde                = 123,
160      DoubleColon          = 124,
161      ShiftLeft            = 125,
162      ShiftRight           = 126,
163      Period               = 127,
164      Hash                 = 128,
165      Exclamation          = 129,
166      DoubleEqual          = 130,
167      NotEqual             = 131,
168      GreaterEqual         = 132,
169      LessEqual            = 133,
170      DoubleBar            = 134,
171      DoubleAmpersand      = 135,
172
173      BooleanLiteral       = 200, // Literals
174      CharacterLiteral     = 201,
175      IntegerLiteral       = 202,
176      FloatingPointLiteral = 203,
177      StringLiteral        = 204,
178      Literal              = 205,
179
180      Define               = 300, // Directives
181      Undef                = 301,
182      If                   = 302,
183      Ifdef                = 303,
184      Ifndef               = 304,
185      Else                 = 305,
186      Elif                 = 306,
187      Include              = 307,
188      Endif                = 308,
189      Line                 = 309,
190      Error                = 310,
191      Pragma               = 311,
192      Null                 = 312,
193      Unknown              = 313,
194
195      Defined              = 400,
196
197      // <f46082.40> Keyword identifiers.
198      //Abstract             = 500,
199      //Custom               = 501,
200      //Init                 = 502,
201      //Private2             = 503,
202      //Public2              = 504,
203      //Supports             = 505,
204      //Truncatable          = 506,
205      //ValueBase            = 507,
206      //Valuetype            = 508,
207
208      EOF                  = 999; // End of Input
209
210  // Available types
211  ///////////////
212  // Keywords
213
214  static final String [] Keywords = {
215      "any",         "attribute",    "boolean",
216      "case",        "char",         "const",
217      "context",     "default",      "double",
218      "enum",        "exception",    "FALSE",      "fixed",
219      "float",       "in",           "inout",
220      "interface",   "long",         "module",     "native",
221      "Object",      "octet",        "oneway",
222      "out",         "raises",       "readonly",
223      "sequence",    "short",        "string",
224      "struct",      "switch",       "TRUE",
225      "typedef",     "unsigned",     "union",
226      "void",        "wchar",        "wstring",
227      "init", // In 2.3 only
228      "abstract",     "custom",      "private",      // 2.3 and 2.4rtf
229      "public",       "supports",    "truncatable",
230      "ValueBase",    "valuetype",
231      "factory",  // In 2.4rtf only
232      // CORBA 3.0 keywords
233      "component",      "consumes",     "emits",
234      "finder",         "getRaises",    "home",
235      "import",         "local",        "manages",
236      "multiple",       "primaryKey",   "provides",
237      "publishes",      "setRaises",    "supports",
238      "typeId",         "typePrefix",   "uses" } ;
239
240  // <f46082.40> Remove keyword identifiers.
241  //static final int
242  //    FirstKeywordIdentifier = 500,
243  //    LastKeywordIdentifier  = Valuetype;
244  //
245  //static final String[] KeywordIdentifiers = {
246  //    "abstract",    "custom",    "init",
247  //    "private",     "public",    "supports",
248  //    "truncatable", "valueBase", "valuetype"};
249
250  /**
251   * Determine whether this token is a keyword.
252   * @return true iff this token is a keyword.
253   **/
254  boolean isKeyword ()
255  {
256    return type >= FirstKeyword && type <= LastKeyword;
257  } // isKeyword
258
259  private static final int
260      FirstKeyword = Any, // 0
261      LastKeyword  = Uses;
262
263  // <f60858.1> Keywords in CORBA 2.2 that we support.
264  private static final int
265      First22Keyword = Any, // 0
266      Last22Keyword  = Wstring;
267
268  // <f60858.1> New keywords in CORBA 2.3 (preliminary) that we support.
269  private static final int
270      First23Keyword = Init,
271      Last23Keyword  = Valuetype;
272
273  // <d62023> New keywords in CORBA 2.4rtf (accepted 2.3) that we support.
274  // Note that "factory" replaces "init".  Scanner must account for this in
275  // keyword scan.
276  private static final int
277      First24rtfKeyword = Abstract,
278      Last24rtfKeyword  = Factory;
279
280  // New keywords in CORBA 3.0 (from CORBA components v. 1)
281  private static final int
282      First30Keyword    = Component,
283      Last30Keyword     = Uses;
284
285  // Current valid CORBA levels:
286  // 2.2 (or <2.3): the default: no OBV support
287  // 2.3: add OBV with init
288  // >2.3: OBV with init replcaed by factory
289  // 3.0: adds components, attr exceptions, local interfaces, type repository
290  //      decls.
291
292  private static final int CORBA_LEVEL_22 = 0 ;
293  private static final int CORBA_LEVEL_23 = 1 ;
294  private static final int CORBA_LEVEL_24RTF = 2 ;
295  private static final int CORBA_LEVEL_30 = 3 ;
296
297  // Do the conversion from a floating point CORBA level to an int
298  private static int getLevel( float cLevel )
299  {
300    if (cLevel < 2.3f)
301        return CORBA_LEVEL_22 ;
302    if (Util.absDelta( cLevel, 2.3f ) < 0.001f)
303        return CORBA_LEVEL_23 ;
304    if (cLevel < 3.0f)
305        return CORBA_LEVEL_24RTF ;
306    return CORBA_LEVEL_30 ;
307  }
308
309  // Return the last keyword corresponding to a particular CORBA level
310  private static int getLastKeyword( int level )
311  {
312    if (level == CORBA_LEVEL_22)
313        return Last22Keyword ;
314    if (level == CORBA_LEVEL_23)
315        return Last23Keyword ;
316    if (level == CORBA_LEVEL_24RTF)
317        return Last24rtfKeyword ;
318    return Last30Keyword ;
319  }
320
321  /** Create a keyword token from a string.
322  * Determines whether the string is an IDL keyword based on the corbaLevel.
323  * Strings that are keywords at higher CORBA levels than the corbaLevel
324  * argument create identifier tokens that are marked as "collidesWithKeyword", unless
325  * escapedOK is FALSE, which is the case only when preprocessing is taking place.
326  * In the case of the "init" keyword, which was only defined in CORBA 2.3, init is
327  * marked deprecated in CORBA 2.3 since it is not supported in higher levels.
328  * @param String string The string we are converting to a token.
329  * @param float corbaLevel The CORBA level, currently in the interval [2.2, 3.0].
330  * @param boolean escapedOK Flag set true if _ is used to escape an IDL keyword for use
331  * as an identifier.
332  * @param boolean[] collidesWithKeyword is an array containing one value: a flag
333  * representing whether this string is an identifier that collides with a keyword.
334  * This is set by this method.
335  * @returns Token The resulting Token corresponding to string.
336  */
337  public static Token makeKeywordToken(
338    String string, float corbaLevel, boolean escapedOK, boolean[] collision )
339  {
340    int level = getLevel( corbaLevel ) ;
341    int lastKeyword = getLastKeyword( level ) ;
342    boolean deprecated = false ;
343    collision[0] = false ;
344
345    // If the string is a keyword token, return that token
346    for (int i = Token.FirstKeyword; i <= Token.LastKeyword; ++i) {
347        if (string.equals (Token.Keywords[i])) {
348            // <f60858.1><d62023> Return identifier if lexeme is a keyword in a
349            // greater CORBA level; collect attributes indicating future keyword/
350            // identifier collision and deprecations.
351
352            // Init is really a funny case.  I don't want to mark it as
353            // a keyword collision in the 2.2 case, since it was only
354            // defined to be a keyword briefly in 2.3.
355            if (i == Token.Init) {
356                if (level == CORBA_LEVEL_23)
357                    deprecated = true ;
358                else
359                    break ;
360            }
361
362            if (i > lastKeyword) {
363                collision[0] |= escapedOK; // escapedOK true iff not preprocessing
364                break ;
365            }
366
367            if (string.equals ("TRUE") || string.equals ("FALSE"))
368                return new Token (Token.BooleanLiteral, string) ;
369            else
370                return new Token (i, deprecated);
371        } else if (string.equalsIgnoreCase (Token.Keywords[i])) {
372            // <d62023> PU!  This will go away in a future release, because
373            // case-insensitive keyword checking will be standard.  For now,
374            // indicate that a keyword collision has occurred.
375            collision[0] |= true;
376            break;
377        }
378    } // for i <= lastKeyword
379
380    return null ;
381  } // makeKeywordToken
382
383  // Keywords
384  ///////////////
385  // Symbols
386
387  static final int
388      FirstSymbol = 100,
389      LastSymbol  = 199;
390
391  static final String [] Symbols = {
392      ";",  "{",  "}",  ":", ",", "=", "+",  "-",
393      "(",  ")",  "<",  ">", "[", "]", "'",  "\"",
394      "\\", "|",  "^",  "&", "*", "/", "%",  "~",
395      "::", "<<", ">>", ".", "#", "!", "==", "!=",
396      ">=", "<=", "||", "&&"};
397
398  // Symbols
399  ///////////////
400  // Literals
401
402  static final int
403      FirstLiteral = 200,
404      LastLiteral  = 299;
405
406  static final String [] Literals = {
407      Util.getMessage ("Token.boolLit"),
408      Util.getMessage ("Token.charLit"),
409      Util.getMessage ("Token.intLit"),
410      Util.getMessage ("Token.floatLit"),
411      Util.getMessage ("Token.stringLit"),
412      Util.getMessage ("Token.literal")};
413
414  // Literals
415  ///////////////
416  // Directives
417
418  /**
419   * Determine whether this token is a preprocessor directive.
420   * @return true iff this token is a preprocessor directive.
421   **/
422  boolean isDirective ()
423  {
424    return type >= FirstDirective && type <= LastDirective;
425  } // isDirective
426
427  static final int
428      FirstDirective = 300,
429      LastDirective  = 399;
430
431  static final String [] Directives = {
432      "define", "undef",  "if",
433      "ifdef",  "ifndef", "else",
434      "elif",   "include","endif",
435      "line",   "error",  "pragma",
436      ""};
437
438  // Directives
439  ///////////////
440  // Specials
441
442  static final int
443      FirstSpecial = 400,
444      LastSpecial  = 499;
445
446  static final String [] Special = {
447      "defined"};
448
449  // Specials
450  ///////////////
451
452  /**
453   * Constructor.
454   * @return a Token of the supplied type.
455   **/
456  Token (int tokenType)
457  {
458    type = tokenType;
459  } // ctor
460
461  // <d62023>
462  /**
463   * Constructor.
464   * @return a Token having the supplied attributes.
465   **/
466  Token (int tokenType, boolean deprecated)
467  {
468    this.type = tokenType;
469    this.isDeprecated = deprecated;
470  } // ctor
471
472  /**
473   * Constructor.
474   * @return a Token having the supplied attributes.
475   **/
476  Token (int tokenType, String tokenName)
477  {
478    type = tokenType;
479    name = tokenName;
480  } // ctor
481
482  /**
483   * Constructor.
484   * @return a Token having the supplied attribtues.
485   *  having
486   **/
487  Token (int tokenType, String tokenName, boolean isWide)
488  {
489    this (tokenType, tokenName);
490    this.isWide = isWide;
491  } // ctor
492
493
494  // <d62023>
495  /**
496   * Constructor.
497   * @return a Token having the supplied attributes.
498   **/
499  Token (int tokenType, String tokenName, boolean escaped,
500      boolean collision, boolean deprecated)
501  {
502    this (tokenType, tokenName);
503    this.isEscaped = escaped;
504    this.collidesWithKeyword = collision;
505    this.isDeprecated = deprecated;
506  } // ctor
507
508  // <f46082.40> Remove keyword identifiers.
509  ///**
510  // * Constructor.
511  // * @return a Token having the supplied attributes.
512  // **/
513  //Token (int tokenType, int tokenSubType, String tokenName)
514  //{
515  //  type    = tokenType;
516  //  subType = tokenSubType;
517  //  name    = tokenName;
518  //} // ctor
519
520  /**
521   * Get the String representation of this Token.
522   * @return a String containing representation of this Token.
523   **/
524  public String toString ()
525  {
526    if (type == Identifier)
527      return name;
528    if (type == MacroIdentifier)
529      return name + '(';
530    return Token.toString (type);
531  } // toString
532
533  /**
534   * Get the String representation of a supplied Token type.
535   * @return A String containing the name of the supplied Token type.
536   **/
537  static String toString (int type)
538  {
539    if (type <= LastKeyword)
540      return Keywords[type];
541    // <f46082.40> Remove keyword identifiers.
542    //if ( (type >= FirstKeywordIdentifier) && (type <= LastKeywordIdentifier) )
543    //  return KeywordIdentifiers[ type - FirstKeywordIdentifier ];
544    if (type == Identifier || type == MacroIdentifier)
545      return Util.getMessage ("Token.identifier");
546    if (type <= LastSymbol)
547      return Symbols[type - FirstSymbol];
548    if (type <= LastLiteral)
549      return Literals[type - FirstLiteral];
550    if (type <= LastDirective)
551      return Directives[type - FirstDirective];
552    if (type <= LastSpecial)
553      return Special[type - FirstSpecial];
554    if (type == EOF)
555      return Util.getMessage ("Token.endOfFile");
556    return Util.getMessage ("Token.unknown");
557  } // toString
558
559  ///////////////
560  // Accessors and Predicates
561
562  /**
563   * Determine whether this token equals a supplied token.
564   * @return true iff the types and names of this and the supplied
565   * Token are equal.
566   **/
567  boolean equals (Token that)
568  {
569    if (this.type == that.type)
570      if (this.name == null)
571        return that.name == null;
572      else
573        return this.name.equals (that.name);
574    return false;
575  } // equals
576
577  /**
578   * Determine whether the this token is of a supplied type.
579   * @return true iff the type of this Token equals that supplied.
580   **/
581  boolean equals (int type)
582  {
583    return this.type == type;
584  } // equals
585
586  /**
587   * Determine whether this identifier has the supplied name.
588   * @return true iff this Token is an identifier having the supplied name.
589   **/
590  boolean equals (String name)
591  {
592    return (this.type == Identifier && this.name.equals (name));
593  } // equals
594
595  // Although isEscaped is an independent attribute, it may be true only
596  // when type is Identifer.
597  /**
598   * Accessor.
599   * @return true iff this token is an escaped identifier.
600   **/
601  public boolean isEscaped ()
602  {
603    return type == Identifier && isEscaped;
604  } // isEscaped
605
606  // <d62023>
607  /**
608   * Accessor.
609   * @return true iff this token is an identifier having a name matching
610   * a keyword in a version of CORBA greater than the specified CORBA level,
611   * or iff it matches a keyword in letter, but note case.
612   **/
613  public boolean collidesWithKeyword ()
614  {
615    return collidesWithKeyword;
616  } // collidesWithKeyword
617
618  // <d62023> Storing deprecation information in a token seems a natural
619  // means to notify the parser about deprecated types.
620  /**
621   * Accessor.
622   * @return true iff this token is a deprecated lexeme or lexical type with
623   * respect to the specified CORBA level.
624   **/
625  public boolean isDeprecated ()
626  {
627    return isDeprecated;
628  }
629  // isDeprecated
630
631  public boolean isWide()
632  {
633      return isWide ;
634  }
635
636  // <d59166><d62023> It's more efficient if Scanner determines this attribute.
637  /**
638   * Determine whether this token collides with an IDL keyword.
639   **/
640  //public boolean collidesWithKeyword ()
641  //{
642  //  if (name != null && type == Identifier && !isEscaped)
643  //  {
644  //    String lcName = name.toLowerCase ();
645  //    for (int i = FirstKeyword; i <= LastKeyword; ++i)
646  //      if (lcName.equals (Token.Keywords [i].toLowerCase ()))
647  //        return true;
648  //  }
649  //  return false;
650  //} // collidesWithKeyword
651
652  // Accessors and Predicates
653  ///////////////
654
655  /**
656   * Code identifying the lexical class to which this token belongs, e.g.,
657   * Keyword, Identifier, ...
658   **/
659  int type;
660  /**
661   * Lexeme extracted from the source for this token.
662   **/
663  String name = null;
664  /**
665   * Source comment associated with this token.
666   **/
667  Comment comment = null;
668  /**
669   * True iff this token is an escaped identifier.
670   **/
671  boolean isEscaped = false; // <d59165>
672  /**
673   * True iff this token is an identifier that is known to be a keyword
674   * in another version of CORBA or matches a keyword in letter, but not case.
675   **/
676  boolean collidesWithKeyword = false;  // <d62023>
677  /**
678   * True iff this token is deprecated.
679   **/
680  boolean isDeprecated = false;  // <d62023>
681  // <f46082.40> Remove keyword identifier implementation.
682  ///**
683  // * Non-zero only when type = [Macro]Identifier
684  // **/
685  //int subType = 0;
686
687  boolean isWide = false ;  // Only for string and char literals: indicates that this is
688                            // a wide string or char.
689} // class Token
690