1/* $NetBSD$ */ 2 3/* 4** Id: llex.c,v 2.20.1.1 2007/12/27 13:02:25 roberto Exp 5** Lexical Analyzer 6** See Copyright Notice in lua.h 7*/ 8 9 10#include <ctype.h> 11#include <locale.h> 12#include <string.h> 13 14#define llex_c 15#define LUA_CORE 16 17#include "lua.h" 18 19#include "ldo.h" 20#include "llex.h" 21#include "lobject.h" 22#include "lparser.h" 23#include "lstate.h" 24#include "lstring.h" 25#include "ltable.h" 26#include "lzio.h" 27 28 29 30#define next(ls) (ls->current = zgetc(ls->z)) 31 32 33 34 35#define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r') 36 37 38/* ORDER RESERVED */ 39const char *const luaX_tokens [] = { 40 "and", "break", "do", "else", "elseif", 41 "end", "false", "for", "function", "if", 42 "in", "local", "nil", "not", "or", "repeat", 43 "return", "then", "true", "until", "while", 44 "..", "...", "==", ">=", "<=", "~=", 45 "<number>", "<name>", "<string>", "<eof>", 46 NULL 47}; 48 49 50#define save_and_next(ls) (save(ls, ls->current), next(ls)) 51 52 53static void save (LexState *ls, int c) { 54 Mbuffer *b = ls->buff; 55 if (b->n + 1 > b->buffsize) { 56 size_t newsize; 57 if (b->buffsize >= MAX_SIZET/2) 58 luaX_lexerror(ls, "lexical element too long", 0); 59 newsize = b->buffsize * 2; 60 luaZ_resizebuffer(ls->L, b, newsize); 61 } 62 b->buffer[b->n++] = cast(char, c); 63} 64 65 66void luaX_init (lua_State *L) { 67 int i; 68 for (i=0; i<NUM_RESERVED; i++) { 69 TString *ts = luaS_new(L, luaX_tokens[i]); 70 luaS_fix(ts); /* reserved words are never collected */ 71 lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN); 72 ts->tsv.reserved = cast_byte(i+1); /* reserved word */ 73 } 74} 75 76 77#define MAXSRC 80 78 79 80const char *luaX_token2str (LexState *ls, int token) { 81 if (token < FIRST_RESERVED) { 82 lua_assert(token == cast(unsigned char, token)); 83 return (iscntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) : 84 luaO_pushfstring(ls->L, "%c", token); 85 } 86 else 87 return luaX_tokens[token-FIRST_RESERVED]; 88} 89 90 91static const char *txtToken (LexState *ls, int token) { 92 switch (token) { 93 case TK_NAME: 94 case TK_STRING: 95 case TK_NUMBER: 96 save(ls, '\0'); 97 return luaZ_buffer(ls->buff); 98 default: 99 return luaX_token2str(ls, token); 100 } 101} 102 103 104void luaX_lexerror (LexState *ls, const char *msg, int token) { 105 char buff[MAXSRC]; 106 luaO_chunkid(buff, getstr(ls->source), MAXSRC); 107 msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg); 108 if (token) 109 luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token)); 110 luaD_throw(ls->L, LUA_ERRSYNTAX); 111} 112 113 114void luaX_syntaxerror (LexState *ls, const char *msg) { 115 luaX_lexerror(ls, msg, ls->t.token); 116} 117 118 119TString *luaX_newstring (LexState *ls, const char *str, size_t l) { 120 lua_State *L = ls->L; 121 TString *ts = luaS_newlstr(L, str, l); 122 TValue *o = luaH_setstr(L, ls->fs->h, ts); /* entry for `str' */ 123 if (ttisnil(o)) 124 setbvalue(o, 1); /* make sure `str' will not be collected */ 125 return ts; 126} 127 128 129static void inclinenumber (LexState *ls) { 130 int old = ls->current; 131 lua_assert(currIsNewline(ls)); 132 next(ls); /* skip `\n' or `\r' */ 133 if (currIsNewline(ls) && ls->current != old) 134 next(ls); /* skip `\n\r' or `\r\n' */ 135 if (++ls->linenumber >= MAX_INT) 136 luaX_syntaxerror(ls, "chunk has too many lines"); 137} 138 139 140void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) { 141 ls->decpoint = '.'; 142 ls->L = L; 143 ls->lookahead.token = TK_EOS; /* no look-ahead token */ 144 ls->z = z; 145 ls->fs = NULL; 146 ls->linenumber = 1; 147 ls->lastline = 1; 148 ls->source = source; 149 luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */ 150 next(ls); /* read first char */ 151} 152 153 154 155/* 156** ======================================================= 157** LEXICAL ANALYZER 158** ======================================================= 159*/ 160 161 162 163static int check_next (LexState *ls, const char *set) { 164 if (!strchr(set, ls->current)) 165 return 0; 166 save_and_next(ls); 167 return 1; 168} 169 170 171static void buffreplace (LexState *ls, char from, char to) { 172 size_t n = luaZ_bufflen(ls->buff); 173 char *p = luaZ_buffer(ls->buff); 174 while (n--) 175 if (p[n] == from) p[n] = to; 176} 177 178 179static void trydecpoint (LexState *ls, SemInfo *seminfo) { 180 /* format error: try to update decimal point separator */ 181 struct lconv *cv = localeconv(); 182 char old = ls->decpoint; 183 ls->decpoint = (cv ? cv->decimal_point[0] : '.'); 184 buffreplace(ls, old, ls->decpoint); /* try updated decimal separator */ 185 if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) { 186 /* format error with correct decimal point: no more options */ 187 buffreplace(ls, ls->decpoint, '.'); /* undo change (for error message) */ 188 luaX_lexerror(ls, "malformed number", TK_NUMBER); 189 } 190} 191 192 193/* LUA_NUMBER */ 194static void read_numeral (LexState *ls, SemInfo *seminfo) { 195 lua_assert(isdigit(ls->current)); 196 do { 197 save_and_next(ls); 198 } while (isdigit(ls->current) || ls->current == '.'); 199 if (check_next(ls, "Ee")) /* `E'? */ 200 check_next(ls, "+-"); /* optional exponent sign */ 201 while (isalnum(ls->current) || ls->current == '_') 202 save_and_next(ls); 203 save(ls, '\0'); 204 buffreplace(ls, '.', ls->decpoint); /* follow locale for decimal point */ 205 if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) /* format error? */ 206 trydecpoint(ls, seminfo); /* try to update decimal point separator */ 207} 208 209 210static int skip_sep (LexState *ls) { 211 int count = 0; 212 int s = ls->current; 213 lua_assert(s == '[' || s == ']'); 214 save_and_next(ls); 215 while (ls->current == '=') { 216 save_and_next(ls); 217 count++; 218 } 219 return (ls->current == s) ? count : (-count) - 1; 220} 221 222 223static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) { 224 int cont = 0; 225 (void)(cont); /* avoid warnings when `cont' is not used */ 226 save_and_next(ls); /* skip 2nd `[' */ 227 if (currIsNewline(ls)) /* string starts with a newline? */ 228 inclinenumber(ls); /* skip it */ 229 for (;;) { 230 switch (ls->current) { 231 case EOZ: 232 luaX_lexerror(ls, (seminfo) ? "unfinished long string" : 233 "unfinished long comment", TK_EOS); 234 break; /* to avoid warnings */ 235#if defined(LUA_COMPAT_LSTR) 236 case '[': { 237 if (skip_sep(ls) == sep) { 238 save_and_next(ls); /* skip 2nd `[' */ 239 cont++; 240#if LUA_COMPAT_LSTR == 1 241 if (sep == 0) 242 luaX_lexerror(ls, "nesting of [[...]] is deprecated", '['); 243#endif 244 } 245 break; 246 } 247#endif 248 case ']': { 249 if (skip_sep(ls) == sep) { 250 save_and_next(ls); /* skip 2nd `]' */ 251#if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2 252 cont--; 253 if (sep == 0 && cont >= 0) break; 254#endif 255 goto endloop; 256 } 257 break; 258 } 259 case '\n': 260 case '\r': { 261 save(ls, '\n'); 262 inclinenumber(ls); 263 if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */ 264 break; 265 } 266 default: { 267 if (seminfo) save_and_next(ls); 268 else next(ls); 269 } 270 } 271 } endloop: 272 if (seminfo) 273 seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep), 274 luaZ_bufflen(ls->buff) - 2*(2 + sep)); 275} 276 277 278static void read_string (LexState *ls, int del, SemInfo *seminfo) { 279 save_and_next(ls); 280 while (ls->current != del) { 281 switch (ls->current) { 282 case EOZ: 283 luaX_lexerror(ls, "unfinished string", TK_EOS); 284 continue; /* to avoid warnings */ 285 case '\n': 286 case '\r': 287 luaX_lexerror(ls, "unfinished string", TK_STRING); 288 continue; /* to avoid warnings */ 289 case '\\': { 290 int c; 291 next(ls); /* do not save the `\' */ 292 switch (ls->current) { 293 case 'a': c = '\a'; break; 294 case 'b': c = '\b'; break; 295 case 'f': c = '\f'; break; 296 case 'n': c = '\n'; break; 297 case 'r': c = '\r'; break; 298 case 't': c = '\t'; break; 299 case 'v': c = '\v'; break; 300 case '\n': /* go through */ 301 case '\r': save(ls, '\n'); inclinenumber(ls); continue; 302 case EOZ: continue; /* will raise an error next loop */ 303 default: { 304 if (!isdigit(ls->current)) 305 save_and_next(ls); /* handles \\, \", \', and \? */ 306 else { /* \xxx */ 307 int i = 0; 308 c = 0; 309 do { 310 c = 10*c + (ls->current-'0'); 311 next(ls); 312 } while (++i<3 && isdigit(ls->current)); 313 if (c > UCHAR_MAX) 314 luaX_lexerror(ls, "escape sequence too large", TK_STRING); 315 save(ls, c); 316 } 317 continue; 318 } 319 } 320 save(ls, c); 321 next(ls); 322 continue; 323 } 324 default: 325 save_and_next(ls); 326 } 327 } 328 save_and_next(ls); /* skip delimiter */ 329 seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1, 330 luaZ_bufflen(ls->buff) - 2); 331} 332 333 334static int llex (LexState *ls, SemInfo *seminfo) { 335 luaZ_resetbuffer(ls->buff); 336 for (;;) { 337 switch (ls->current) { 338 case '\n': 339 case '\r': { 340 inclinenumber(ls); 341 continue; 342 } 343 case '-': { 344 next(ls); 345 if (ls->current != '-') return '-'; 346 /* else is a comment */ 347 next(ls); 348 if (ls->current == '[') { 349 int sep = skip_sep(ls); 350 luaZ_resetbuffer(ls->buff); /* `skip_sep' may dirty the buffer */ 351 if (sep >= 0) { 352 read_long_string(ls, NULL, sep); /* long comment */ 353 luaZ_resetbuffer(ls->buff); 354 continue; 355 } 356 } 357 /* else short comment */ 358 while (!currIsNewline(ls) && ls->current != EOZ) 359 next(ls); 360 continue; 361 } 362 case '[': { 363 int sep = skip_sep(ls); 364 if (sep >= 0) { 365 read_long_string(ls, seminfo, sep); 366 return TK_STRING; 367 } 368 else if (sep == -1) return '['; 369 else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING); 370 } 371 case '=': { 372 next(ls); 373 if (ls->current != '=') return '='; 374 else { next(ls); return TK_EQ; } 375 } 376 case '<': { 377 next(ls); 378 if (ls->current != '=') return '<'; 379 else { next(ls); return TK_LE; } 380 } 381 case '>': { 382 next(ls); 383 if (ls->current != '=') return '>'; 384 else { next(ls); return TK_GE; } 385 } 386 case '~': { 387 next(ls); 388 if (ls->current != '=') return '~'; 389 else { next(ls); return TK_NE; } 390 } 391 case '"': 392 case '\'': { 393 read_string(ls, ls->current, seminfo); 394 return TK_STRING; 395 } 396 case '.': { 397 save_and_next(ls); 398 if (check_next(ls, ".")) { 399 if (check_next(ls, ".")) 400 return TK_DOTS; /* ... */ 401 else return TK_CONCAT; /* .. */ 402 } 403 else if (!isdigit(ls->current)) return '.'; 404 else { 405 read_numeral(ls, seminfo); 406 return TK_NUMBER; 407 } 408 } 409 case EOZ: { 410 return TK_EOS; 411 } 412 default: { 413 if (isspace(ls->current)) { 414 lua_assert(!currIsNewline(ls)); 415 next(ls); 416 continue; 417 } 418 else if (isdigit(ls->current)) { 419 read_numeral(ls, seminfo); 420 return TK_NUMBER; 421 } 422 else if (isalpha(ls->current) || ls->current == '_') { 423 /* identifier or reserved word */ 424 TString *ts; 425 do { 426 save_and_next(ls); 427 } while (isalnum(ls->current) || ls->current == '_'); 428 ts = luaX_newstring(ls, luaZ_buffer(ls->buff), 429 luaZ_bufflen(ls->buff)); 430 if (ts->tsv.reserved > 0) /* reserved word? */ 431 return ts->tsv.reserved - 1 + FIRST_RESERVED; 432 else { 433 seminfo->ts = ts; 434 return TK_NAME; 435 } 436 } 437 else { 438 int c = ls->current; 439 next(ls); 440 return c; /* single-char tokens (+ - / ...) */ 441 } 442 } 443 } 444 } 445} 446 447 448void luaX_next (LexState *ls) { 449 ls->lastline = ls->linenumber; 450 if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */ 451 ls->t = ls->lookahead; /* use this one */ 452 ls->lookahead.token = TK_EOS; /* and discharge it */ 453 } 454 else 455 ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */ 456} 457 458 459void luaX_lookahead (LexState *ls) { 460 lua_assert(ls->lookahead.token == TK_EOS); 461 ls->lookahead.token = llex(ls, &ls->lookahead.seminfo); 462} 463 464