1/* 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996-2009 Oracle. All rights reserved. 5 * 6 */ 7 8/* 9 * Most of this lexical analyzer code is taken directly from sqlite source. 10 */ 11#include <ctype.h> 12#include <stdlib.h> 13#include "db_sql.h" 14 15/* 16** The charMap() macro maps alphabetic characters into their 17** lower-case ASCII equivalent. On ASCII machines, this is just 18** an upper-to-lower case map. On EBCDIC machines we also need 19** to adjust the encoding. Only alphabetic characters and underscores 20** need to be translated. 21*/ 22#ifdef SQLITE_ASCII 23# define charMap(X) sqlite3UpperToLower[(unsigned char)X] 24#endif 25#ifdef SQLITE_EBCDIC 26# define charMap(X) ebcdicToAscii[(unsigned char)X] 27const unsigned char ebcdicToAscii[] = { 28/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ 29 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ 30 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ 31 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ 32 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3x */ 33 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */ 34 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5x */ 35 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 0, /* 6x */ 36 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7x */ 37 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* 8x */ 38 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* 9x */ 39 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ax */ 40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ 41 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* Cx */ 42 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* Dx */ 43 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ex */ 44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Fx */ 45}; 46#endif 47 48/* 49** The sqlite3KeywordCode function looks up an identifier to determine if 50** it is a keyword. If it is a keyword, the token code of that keyword is 51** returned. If the input is not a keyword, TK_ID is returned. 52** 53** The implementation of this routine was generated by a program, 54** mkkeywordhash.h, located in the tool subdirectory of the distribution. 55** The output of the mkkeywordhash.c program is written into a file 56** named keywordhash.h and then included into this source file by 57** the #include below. 58*/ 59#include "sqlite/keywordhash.h" 60 61 62/* 63** If X is a character that can be used in an identifier then 64** IdChar(X) will be true. Otherwise it is false. 65** 66** For ASCII, any character with the high-order bit set is 67** allowed in an identifier. For 7-bit characters, 68** sqlite3IsIdChar[X] must be 1. 69** 70** For EBCDIC, the rules are more complex but have the same 71** end result. 72** 73** Ticket #1066. the SQL standard does not allow '$' in the 74** middle of identfiers. But many SQL implementations do. 75** SQLite will allow '$' in identifiers for compatibility. 76** But the feature is undocumented. 77*/ 78#ifdef SQLITE_ASCII 79const char sqlite3IsAsciiIdChar[] = { 80/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ 81 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ 82 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ 83 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ 84 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ 85 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ 86 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ 87}; 88#define IdChar(C) (((c=C)&0x80)!=0 || (c>0x1f && sqlite3IsAsciiIdChar[c-0x20])) 89#endif 90#ifdef SQLITE_EBCDIC 91const char sqlite3IsEbcdicIdChar[] = { 92/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ 93 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 4x */ 94 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, /* 5x */ 95 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, /* 6x */ 96 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, /* 7x */ 97 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, /* 8x */ 98 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, /* 9x */ 99 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, /* Ax */ 100 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ 101 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Cx */ 102 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Dx */ 103 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Ex */ 104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, /* Fx */ 105}; 106#define IdChar(C) (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40])) 107#endif 108 109 110/* 111** Return the length of the token that begins at z[0]. 112** Store the token type in *tokenType before returning. 113*/ 114static int getToken(const unsigned char *z, int *tokenType){ 115 int i, c; 116 switch( *z ){ 117 case ' ': case '\t': case '\n': case '\f': case '\r': { 118 for(i=1; isspace(z[i]); i++){} 119 *tokenType = TK_SPACE; 120 return i; 121 } 122 case '-': { 123 if( z[1]=='-' ){ 124 for(i=2; (c=z[i])!=0 && c!='\n'; i++){} 125 *tokenType = TK_COMMENT; 126 return i; 127 } 128 *tokenType = TK_MINUS; 129 return 1; 130 } 131 case '(': { 132 *tokenType = TK_LP; 133 return 1; 134 } 135 case ')': { 136 *tokenType = TK_RP; 137 return 1; 138 } 139 case ';': { 140 *tokenType = TK_SEMI; 141 return 1; 142 } 143 case '+': { 144 *tokenType = TK_PLUS; 145 return 1; 146 } 147 case '*': { 148 *tokenType = TK_STAR; 149 return 1; 150 } 151 case '/': { 152 if( z[1]!='*' || z[2]==0 ){ 153 *tokenType = TK_SLASH; 154 return 1; 155 } 156 for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){} 157 if( c ) i++; 158 *tokenType = TK_COMMENT; 159 return i; 160 } 161 case '%': { 162 *tokenType = TK_REM; 163 return 1; 164 } 165 case '=': { 166 *tokenType = TK_EQ; 167 return 1 + (z[1]=='='); 168 } 169 case '<': { 170 if( (c=z[1])=='=' ){ 171 *tokenType = TK_LE; 172 return 2; 173 }else if( c=='>' ){ 174 *tokenType = TK_NE; 175 return 2; 176 }else if( c=='<' ){ 177 *tokenType = TK_LSHIFT; 178 return 2; 179 }else{ 180 *tokenType = TK_LT; 181 return 1; 182 } 183 } 184 case '>': { 185 if( (c=z[1])=='=' ){ 186 *tokenType = TK_GE; 187 return 2; 188 }else if( c=='>' ){ 189 *tokenType = TK_RSHIFT; 190 return 2; 191 }else{ 192 *tokenType = TK_GT; 193 return 1; 194 } 195 } 196 case '!': { 197 if( z[1]!='=' ){ 198 *tokenType = TK_ILLEGAL; 199 return 2; 200 }else{ 201 *tokenType = TK_NE; 202 return 2; 203 } 204 } 205 case '|': { 206 if( z[1]!='|' ){ 207 *tokenType = TK_BITOR; 208 return 1; 209 }else{ 210 *tokenType = TK_CONCAT; 211 return 2; 212 } 213 } 214 case ',': { 215 *tokenType = TK_COMMA; 216 return 1; 217 } 218 case '&': { 219 *tokenType = TK_BITAND; 220 return 1; 221 } 222 case '~': { 223 *tokenType = TK_BITNOT; 224 return 1; 225 } 226 case '`': 227 case '\'': 228 case '"': { 229 int delim = z[0]; 230 for(i=1; (c=z[i])!=0; i++){ 231 if( c==delim ){ 232 if( z[i+1]==delim ){ 233 i++; 234 }else{ 235 break; 236 } 237 } 238 } 239 if( c ){ 240 *tokenType = TK_STRING; 241 return i+1; 242 }else{ 243 *tokenType = TK_ILLEGAL; 244 return i; 245 } 246 } 247 case '.': { 248#ifndef SQLITE_OMIT_FLOATING_POINT 249 if( !isdigit(z[1]) ) 250#endif 251 { 252 *tokenType = TK_DOT; 253 return 1; 254 } 255 /* If the next character is a digit, this is a floating point 256 ** number that begins with ".". Fall thru into the next case */ 257 } 258 case '0': case '1': case '2': case '3': case '4': 259 case '5': case '6': case '7': case '8': case '9': { 260 *tokenType = TK_INTEGER; 261 for(i=0; isdigit(z[i]); i++){} 262#ifndef SQLITE_OMIT_FLOATING_POINT 263 if( z[i]=='.' ){ 264 i++; 265 while( isdigit(z[i]) ){ i++; } 266 *tokenType = TK_FLOAT; 267 } 268 if( (z[i]=='e' || z[i]=='E') && 269 ( isdigit(z[i+1]) 270 || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2])) 271 ) 272 ){ 273 i += 2; 274 while( isdigit(z[i]) ){ i++; } 275 *tokenType = TK_FLOAT; 276 } 277#endif 278 while( IdChar(z[i]) ){ 279 *tokenType = TK_ILLEGAL; 280 i++; 281 } 282 return i; 283 } 284 case '[': { 285 for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} 286 *tokenType = c==']' ? TK_ID : TK_ILLEGAL; 287 return i; 288 } 289 case '?': { 290 *tokenType = TK_VARIABLE; 291 for(i=1; isdigit(z[i]); i++){} 292 return i; 293 } 294 case '#': { 295 for(i=1; isdigit(z[i]); i++){} 296 if( i>1 ){ 297 /* Parameters of the form #NNN (where NNN is a number) are used 298 ** internally by sqlite3NestedParse. */ 299 *tokenType = TK_REGISTER; 300 return i; 301 } 302 /* Fall through into the next case if the '#' is not followed by 303 ** a digit. Try to match #AAAA where AAAA is a parameter name. */ 304 } 305#ifndef SQLITE_OMIT_TCL_VARIABLE 306 case '$': 307#endif 308 case '@': /* For compatibility with MS SQL Server */ 309 case ':': { 310 int n = 0; 311 *tokenType = TK_VARIABLE; 312 for(i=1; (c=z[i])!=0; i++){ 313 if( IdChar(c) ){ 314 n++; 315#ifndef SQLITE_OMIT_TCL_VARIABLE 316 }else if( c=='(' && n>0 ){ 317 do{ 318 i++; 319 }while( (c=z[i])!=0 && !isspace(c) && c!=')' ); 320 if( c==')' ){ 321 i++; 322 }else{ 323 *tokenType = TK_ILLEGAL; 324 } 325 break; 326 }else if( c==':' && z[i+1]==':' ){ 327 i++; 328#endif 329 }else{ 330 break; 331 } 332 } 333 if( n==0 ) *tokenType = TK_ILLEGAL; 334 return i; 335 } 336#ifndef SQLITE_OMIT_BLOB_LITERAL 337 case 'x': case 'X': { 338 if( z[1]=='\'' ){ 339 *tokenType = TK_BLOB; 340 for(i=2; (c=z[i])!=0 && c!='\''; i++){ 341 if( !isxdigit(c) ){ 342 *tokenType = TK_ILLEGAL; 343 } 344 } 345 if( i%2 || !c ) *tokenType = TK_ILLEGAL; 346 if( c ) i++; 347 return i; 348 } 349 /* Otherwise fall through to the next case */ 350 } 351#endif 352 default: { 353 if( !IdChar(*z) ){ 354 break; 355 } 356 for(i=1; IdChar(z[i]); i++){} 357 *tokenType = keywordCode((char*)z, i); 358 return i; 359 } 360 } 361 *tokenType = TK_ILLEGAL; 362 return 1; 363} 364 365static int 366bdb_run_parser(Parse *pParse, const char *zSql, char **pzErrMsg){ 367 int nErr = 0; 368 int i; 369 void *pEngine; 370 int tokenType; 371 int lastTokenParsed = -1; 372 pParse->rc = SQLITE_OK; 373 pParse->zTail = pParse->zSql = zSql; 374 i = 0; 375 pEngine = sqlite3ParserAlloc((void*(*)(size_t))malloc); 376 if( pEngine==0 ){ 377 return SQLITE_NOMEM; 378 } 379 380 while(zSql[i]!=0 ){ 381 assert( i>=0 ); 382 pParse->sLastToken.z = (u8*)&zSql[i]; 383 assert( pParse->sLastToken.dyn==0 ); 384 pParse->sLastToken.n = getToken((unsigned char*)&zSql[i],&tokenType); 385 i += pParse->sLastToken.n; 386 if( i>SQLITE_MAX_SQL_LENGTH ){ 387 pParse->rc = SQLITE_TOOBIG; 388 break; 389 } 390 switch( tokenType ) { 391 case TK_SPACE: { 392 break; 393 } 394 case TK_COMMENT: { 395 parse_hint_comment(&pParse->sLastToken); 396 break; 397 } 398 case TK_ILLEGAL: { 399 if( pzErrMsg ){ 400 free(*pzErrMsg); 401 *pzErrMsg = sqlite3MPrintf(0, "unrecognized token: \"%T\"", 402 &pParse->sLastToken); 403 } 404 nErr++; 405 goto abort_parse; 406 } 407 case TK_SEMI: { 408 pParse->zTail = &zSql[i]; 409 /* Fall thru into the default case */ 410 } 411 default: { 412 preparser(pEngine, tokenType, pParse->sLastToken, pParse); 413 lastTokenParsed = tokenType; 414 if( pParse->rc!=SQLITE_OK ){ 415 goto abort_parse; 416 } 417 break; 418 } 419 } 420 } 421abort_parse: 422 if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){ 423 sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse); 424 pParse->zTail = &zSql[i]; 425 sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse); 426 } 427 sqlite3ParserFree(pEngine,free); 428 if( 0 ){ 429 pParse->rc = SQLITE_NOMEM; 430 } 431 if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){ 432 setString(&pParse->zErrMsg, sqlite3ErrStr(pParse->rc), (char*)0); 433 } 434 if( pParse->zErrMsg ){ 435 if( pzErrMsg && *pzErrMsg==0 ){ 436 *pzErrMsg = pParse->zErrMsg; 437 }else{ 438 free(pParse->zErrMsg); 439 } 440 pParse->zErrMsg = 0; 441 nErr++; 442 } 443 if( nErr>0 && (pParse->rc==SQLITE_OK || pParse->rc==SQLITE_DONE) ){ 444 pParse->rc = SQLITE_ERROR; 445 } 446 return nErr; 447} 448 449int do_parse(const char *zSql, char **pzErrMsg) { 450 Parse sParse; 451 memset(&sParse, 0, sizeof(sParse)); 452 return bdb_run_parser(&sParse, zSql, pzErrMsg); 453} 454