1/*	$NetBSD$	*/
2
3/*
4** Id: llex.c,v 2.20.1.1 2007/12/27 13:02:25 roberto Exp
5** Lexical Analyzer
6** See Copyright Notice in lua.h
7*/
8
9
10#include <ctype.h>
11#include <locale.h>
12#include <string.h>
13
14#define llex_c
15#define LUA_CORE
16
17#include "lua.h"
18
19#include "ldo.h"
20#include "llex.h"
21#include "lobject.h"
22#include "lparser.h"
23#include "lstate.h"
24#include "lstring.h"
25#include "ltable.h"
26#include "lzio.h"
27
28
29
30#define next(ls) (ls->current = zgetc(ls->z))
31
32
33
34
35#define currIsNewline(ls)	(ls->current == '\n' || ls->current == '\r')
36
37
38/* ORDER RESERVED */
39const char *const luaX_tokens [] = {
40    "and", "break", "do", "else", "elseif",
41    "end", "false", "for", "function", "if",
42    "in", "local", "nil", "not", "or", "repeat",
43    "return", "then", "true", "until", "while",
44    "..", "...", "==", ">=", "<=", "~=",
45    "<number>", "<name>", "<string>", "<eof>",
46    NULL
47};
48
49
50#define save_and_next(ls) (save(ls, ls->current), next(ls))
51
52
53static void save (LexState *ls, int c) {
54  Mbuffer *b = ls->buff;
55  if (b->n + 1 > b->buffsize) {
56    size_t newsize;
57    if (b->buffsize >= MAX_SIZET/2)
58      luaX_lexerror(ls, "lexical element too long", 0);
59    newsize = b->buffsize * 2;
60    luaZ_resizebuffer(ls->L, b, newsize);
61  }
62  b->buffer[b->n++] = cast(char, c);
63}
64
65
66void luaX_init (lua_State *L) {
67  int i;
68  for (i=0; i<NUM_RESERVED; i++) {
69    TString *ts = luaS_new(L, luaX_tokens[i]);
70    luaS_fix(ts);  /* reserved words are never collected */
71    lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN);
72    ts->tsv.reserved = cast_byte(i+1);  /* reserved word */
73  }
74}
75
76
77#define MAXSRC          80
78
79
80const char *luaX_token2str (LexState *ls, int token) {
81  if (token < FIRST_RESERVED) {
82    lua_assert(token == cast(unsigned char, token));
83    return (iscntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) :
84                              luaO_pushfstring(ls->L, "%c", token);
85  }
86  else
87    return luaX_tokens[token-FIRST_RESERVED];
88}
89
90
91static const char *txtToken (LexState *ls, int token) {
92  switch (token) {
93    case TK_NAME:
94    case TK_STRING:
95    case TK_NUMBER:
96      save(ls, '\0');
97      return luaZ_buffer(ls->buff);
98    default:
99      return luaX_token2str(ls, token);
100  }
101}
102
103
104void luaX_lexerror (LexState *ls, const char *msg, int token) {
105  char buff[MAXSRC];
106  luaO_chunkid(buff, getstr(ls->source), MAXSRC);
107  msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg);
108  if (token)
109    luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token));
110  luaD_throw(ls->L, LUA_ERRSYNTAX);
111}
112
113
114void luaX_syntaxerror (LexState *ls, const char *msg) {
115  luaX_lexerror(ls, msg, ls->t.token);
116}
117
118
119TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
120  lua_State *L = ls->L;
121  TString *ts = luaS_newlstr(L, str, l);
122  TValue *o = luaH_setstr(L, ls->fs->h, ts);  /* entry for `str' */
123  if (ttisnil(o))
124    setbvalue(o, 1);  /* make sure `str' will not be collected */
125  return ts;
126}
127
128
129static void inclinenumber (LexState *ls) {
130  int old = ls->current;
131  lua_assert(currIsNewline(ls));
132  next(ls);  /* skip `\n' or `\r' */
133  if (currIsNewline(ls) && ls->current != old)
134    next(ls);  /* skip `\n\r' or `\r\n' */
135  if (++ls->linenumber >= MAX_INT)
136    luaX_syntaxerror(ls, "chunk has too many lines");
137}
138
139
140void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) {
141  ls->decpoint = '.';
142  ls->L = L;
143  ls->lookahead.token = TK_EOS;  /* no look-ahead token */
144  ls->z = z;
145  ls->fs = NULL;
146  ls->linenumber = 1;
147  ls->lastline = 1;
148  ls->source = source;
149  luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);  /* initialize buffer */
150  next(ls);  /* read first char */
151}
152
153
154
155/*
156** =======================================================
157** LEXICAL ANALYZER
158** =======================================================
159*/
160
161
162
163static int check_next (LexState *ls, const char *set) {
164  if (!strchr(set, ls->current))
165    return 0;
166  save_and_next(ls);
167  return 1;
168}
169
170
171static void buffreplace (LexState *ls, char from, char to) {
172  size_t n = luaZ_bufflen(ls->buff);
173  char *p = luaZ_buffer(ls->buff);
174  while (n--)
175    if (p[n] == from) p[n] = to;
176}
177
178
179static void trydecpoint (LexState *ls, SemInfo *seminfo) {
180  /* format error: try to update decimal point separator */
181  struct lconv *cv = localeconv();
182  char old = ls->decpoint;
183  ls->decpoint = (cv ? cv->decimal_point[0] : '.');
184  buffreplace(ls, old, ls->decpoint);  /* try updated decimal separator */
185  if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) {
186    /* format error with correct decimal point: no more options */
187    buffreplace(ls, ls->decpoint, '.');  /* undo change (for error message) */
188    luaX_lexerror(ls, "malformed number", TK_NUMBER);
189  }
190}
191
192
193/* LUA_NUMBER */
194static void read_numeral (LexState *ls, SemInfo *seminfo) {
195  lua_assert(isdigit(ls->current));
196  do {
197    save_and_next(ls);
198  } while (isdigit(ls->current) || ls->current == '.');
199  if (check_next(ls, "Ee"))  /* `E'? */
200    check_next(ls, "+-");  /* optional exponent sign */
201  while (isalnum(ls->current) || ls->current == '_')
202    save_and_next(ls);
203  save(ls, '\0');
204  buffreplace(ls, '.', ls->decpoint);  /* follow locale for decimal point */
205  if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r))  /* format error? */
206    trydecpoint(ls, seminfo); /* try to update decimal point separator */
207}
208
209
210static int skip_sep (LexState *ls) {
211  int count = 0;
212  int s = ls->current;
213  lua_assert(s == '[' || s == ']');
214  save_and_next(ls);
215  while (ls->current == '=') {
216    save_and_next(ls);
217    count++;
218  }
219  return (ls->current == s) ? count : (-count) - 1;
220}
221
222
223static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
224  int cont = 0;
225  (void)(cont);  /* avoid warnings when `cont' is not used */
226  save_and_next(ls);  /* skip 2nd `[' */
227  if (currIsNewline(ls))  /* string starts with a newline? */
228    inclinenumber(ls);  /* skip it */
229  for (;;) {
230    switch (ls->current) {
231      case EOZ:
232        luaX_lexerror(ls, (seminfo) ? "unfinished long string" :
233                                   "unfinished long comment", TK_EOS);
234        break;  /* to avoid warnings */
235#if defined(LUA_COMPAT_LSTR)
236      case '[': {
237        if (skip_sep(ls) == sep) {
238          save_and_next(ls);  /* skip 2nd `[' */
239          cont++;
240#if LUA_COMPAT_LSTR == 1
241          if (sep == 0)
242            luaX_lexerror(ls, "nesting of [[...]] is deprecated", '[');
243#endif
244        }
245        break;
246      }
247#endif
248      case ']': {
249        if (skip_sep(ls) == sep) {
250          save_and_next(ls);  /* skip 2nd `]' */
251#if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2
252          cont--;
253          if (sep == 0 && cont >= 0) break;
254#endif
255          goto endloop;
256        }
257        break;
258      }
259      case '\n':
260      case '\r': {
261        save(ls, '\n');
262        inclinenumber(ls);
263        if (!seminfo) luaZ_resetbuffer(ls->buff);  /* avoid wasting space */
264        break;
265      }
266      default: {
267        if (seminfo) save_and_next(ls);
268        else next(ls);
269      }
270    }
271  } endloop:
272  if (seminfo)
273    seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
274                                     luaZ_bufflen(ls->buff) - 2*(2 + sep));
275}
276
277
278static void read_string (LexState *ls, int del, SemInfo *seminfo) {
279  save_and_next(ls);
280  while (ls->current != del) {
281    switch (ls->current) {
282      case EOZ:
283        luaX_lexerror(ls, "unfinished string", TK_EOS);
284        continue;  /* to avoid warnings */
285      case '\n':
286      case '\r':
287        luaX_lexerror(ls, "unfinished string", TK_STRING);
288        continue;  /* to avoid warnings */
289      case '\\': {
290        int c;
291        next(ls);  /* do not save the `\' */
292        switch (ls->current) {
293          case 'a': c = '\a'; break;
294          case 'b': c = '\b'; break;
295          case 'f': c = '\f'; break;
296          case 'n': c = '\n'; break;
297          case 'r': c = '\r'; break;
298          case 't': c = '\t'; break;
299          case 'v': c = '\v'; break;
300          case '\n':  /* go through */
301          case '\r': save(ls, '\n'); inclinenumber(ls); continue;
302          case EOZ: continue;  /* will raise an error next loop */
303          default: {
304            if (!isdigit(ls->current))
305              save_and_next(ls);  /* handles \\, \", \', and \? */
306            else {  /* \xxx */
307              int i = 0;
308              c = 0;
309              do {
310                c = 10*c + (ls->current-'0');
311                next(ls);
312              } while (++i<3 && isdigit(ls->current));
313              if (c > UCHAR_MAX)
314                luaX_lexerror(ls, "escape sequence too large", TK_STRING);
315              save(ls, c);
316            }
317            continue;
318          }
319        }
320        save(ls, c);
321        next(ls);
322        continue;
323      }
324      default:
325        save_and_next(ls);
326    }
327  }
328  save_and_next(ls);  /* skip delimiter */
329  seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
330                                   luaZ_bufflen(ls->buff) - 2);
331}
332
333
334static int llex (LexState *ls, SemInfo *seminfo) {
335  luaZ_resetbuffer(ls->buff);
336  for (;;) {
337    switch (ls->current) {
338      case '\n':
339      case '\r': {
340        inclinenumber(ls);
341        continue;
342      }
343      case '-': {
344        next(ls);
345        if (ls->current != '-') return '-';
346        /* else is a comment */
347        next(ls);
348        if (ls->current == '[') {
349          int sep = skip_sep(ls);
350          luaZ_resetbuffer(ls->buff);  /* `skip_sep' may dirty the buffer */
351          if (sep >= 0) {
352            read_long_string(ls, NULL, sep);  /* long comment */
353            luaZ_resetbuffer(ls->buff);
354            continue;
355          }
356        }
357        /* else short comment */
358        while (!currIsNewline(ls) && ls->current != EOZ)
359          next(ls);
360        continue;
361      }
362      case '[': {
363        int sep = skip_sep(ls);
364        if (sep >= 0) {
365          read_long_string(ls, seminfo, sep);
366          return TK_STRING;
367        }
368        else if (sep == -1) return '[';
369        else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING);
370      }
371      case '=': {
372        next(ls);
373        if (ls->current != '=') return '=';
374        else { next(ls); return TK_EQ; }
375      }
376      case '<': {
377        next(ls);
378        if (ls->current != '=') return '<';
379        else { next(ls); return TK_LE; }
380      }
381      case '>': {
382        next(ls);
383        if (ls->current != '=') return '>';
384        else { next(ls); return TK_GE; }
385      }
386      case '~': {
387        next(ls);
388        if (ls->current != '=') return '~';
389        else { next(ls); return TK_NE; }
390      }
391      case '"':
392      case '\'': {
393        read_string(ls, ls->current, seminfo);
394        return TK_STRING;
395      }
396      case '.': {
397        save_and_next(ls);
398        if (check_next(ls, ".")) {
399          if (check_next(ls, "."))
400            return TK_DOTS;   /* ... */
401          else return TK_CONCAT;   /* .. */
402        }
403        else if (!isdigit(ls->current)) return '.';
404        else {
405          read_numeral(ls, seminfo);
406          return TK_NUMBER;
407        }
408      }
409      case EOZ: {
410        return TK_EOS;
411      }
412      default: {
413        if (isspace(ls->current)) {
414          lua_assert(!currIsNewline(ls));
415          next(ls);
416          continue;
417        }
418        else if (isdigit(ls->current)) {
419          read_numeral(ls, seminfo);
420          return TK_NUMBER;
421        }
422        else if (isalpha(ls->current) || ls->current == '_') {
423          /* identifier or reserved word */
424          TString *ts;
425          do {
426            save_and_next(ls);
427          } while (isalnum(ls->current) || ls->current == '_');
428          ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
429                                  luaZ_bufflen(ls->buff));
430          if (ts->tsv.reserved > 0)  /* reserved word? */
431            return ts->tsv.reserved - 1 + FIRST_RESERVED;
432          else {
433            seminfo->ts = ts;
434            return TK_NAME;
435          }
436        }
437        else {
438          int c = ls->current;
439          next(ls);
440          return c;  /* single-char tokens (+ - / ...) */
441        }
442      }
443    }
444  }
445}
446
447
448void luaX_next (LexState *ls) {
449  ls->lastline = ls->linenumber;
450  if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
451    ls->t = ls->lookahead;  /* use this one */
452    ls->lookahead.token = TK_EOS;  /* and discharge it */
453  }
454  else
455    ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */
456}
457
458
459void luaX_lookahead (LexState *ls) {
460  lua_assert(ls->lookahead.token == TK_EOS);
461  ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
462}
463
464