root/lib/lua/llex.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. save
  2. luaX_init
  3. luaX_token2str
  4. txtToken
  5. luaX_lexerror
  6. luaX_syntaxerror
  7. luaX_newstring
  8. inclinenumber
  9. luaX_setinput
  10. check_next
  11. buffreplace
  12. trydecpoint
  13. read_numeral
  14. skip_sep
  15. read_long_string
  16. read_string
  17. llex
  18. luaX_next
  19. luaX_lookahead

   1 /*
   2 ** $Id: llex.c,v 2.20.1.2 2009/11/23 14:58:22 roberto Exp $
   3 ** Lexical Analyzer
   4 ** See Copyright Notice in lua.h
   5 */
   6 
   7 
   8 #include <ctype.h>
   9 #include <locale.h>
  10 #include <string.h>
  11 
  12 #define llex_c
  13 #define LUA_CORE
  14 
  15 #include "lua.h"
  16 
  17 #include "ldo.h"
  18 #include "llex.h"
  19 #include "lobject.h"
  20 #include "lparser.h"
  21 #include "lstate.h"
  22 #include "lstring.h"
  23 #include "ltable.h"
  24 #include "lzio.h"
  25 
  26 
  27 
  28 #define next(ls) (ls->current = zgetc(ls->z))
  29 
  30 
  31 
  32 
  33 #define currIsNewline(ls)       (ls->current == '\n' || ls->current == '\r')
  34 
  35 
  36 /* ORDER RESERVED */
  37 const char *const luaX_tokens [] = {
  38     "and", "break", "do", "else", "elseif",
  39     "end", "false", "for", "function", "if",
  40     "in", "local", "nil", "not", "or", "repeat",
  41     "return", "then", "true", "until", "while",
  42     "..", "...", "==", ">=", "<=", "~=",
  43     "<number>", "<name>", "<string>", "<eof>",
  44     NULL
  45 };
  46 
  47 
  48 #define save_and_next(ls) (save(ls, ls->current), next(ls))
  49 
  50 
  51 static void save (LexState *ls, int c) {
  52   Mbuffer *b = ls->buff;
  53   if (b->n + 1 > b->buffsize) {
  54     size_t newsize;
  55     if (b->buffsize >= MAX_SIZET/2)
  56       luaX_lexerror(ls, "lexical element too long", 0);
  57     newsize = b->buffsize * 2;
  58     luaZ_resizebuffer(ls->L, b, newsize);
  59   }
  60   b->buffer[b->n++] = cast(char, c);
  61 }
  62 
  63 
  64 LUAI_FUNC void luaX_init (lua_State *L) {
  65   int i;
  66   for (i=0; i<NUM_RESERVED; i++) {
  67     TString *ts = luaS_new(L, luaX_tokens[i]);
  68     luaS_fix(ts);  /* reserved words are never collected */
  69     lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN);
  70     ts->tsv.reserved = cast_byte(i+1);  /* reserved word */
  71   }
  72 }
  73 
  74 
  75 #define MAXSRC          80
  76 
  77 
  78 LUAI_FUNC const char *luaX_token2str (LexState *ls, int token) {
  79   if (token < FIRST_RESERVED) {
  80     lua_assert(token == cast(unsigned char, token));
  81     return (iscntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) :
  82                               luaO_pushfstring(ls->L, "%c", token);
  83   }
  84   else
  85     return luaX_tokens[token-FIRST_RESERVED];
  86 }
  87 
  88 
  89 static const char *txtToken (LexState *ls, int token) {
  90   switch (token) {
  91     case TK_NAME:
  92     case TK_STRING:
  93     case TK_NUMBER:
  94       save(ls, '\0');
  95       return luaZ_buffer(ls->buff);
  96     default:
  97       return luaX_token2str(ls, token);
  98   }
  99 }
 100 
 101 
 102 LUAI_FUNC void luaX_lexerror (LexState *ls, const char *msg, int token) {
 103   char buff[MAXSRC];
 104   luaO_chunkid(buff, getstr(ls->source), MAXSRC);
 105   msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg);
 106   if (token)
 107     luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token));
 108   luaD_throw(ls->L, LUA_ERRSYNTAX);
 109 }
 110 
 111 
 112 LUAI_FUNC void luaX_syntaxerror (LexState *ls, const char *msg) {
 113   luaX_lexerror(ls, msg, ls->t.token);
 114 }
 115 
 116 
 117 LUAI_FUNC TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
 118   lua_State *L = ls->L;
 119   TString *ts = luaS_newlstr(L, str, l);
 120   TValue *o = luaH_setstr(L, ls->fs->h, ts);  /* entry for `str' */
 121   if (ttisnil(o)) {
 122     setbvalue(o, 1);  /* make sure `str' will not be collected */
 123     luaC_checkGC(L);
 124   }
 125   return ts;
 126 }
 127 
 128 
 129 static void inclinenumber (LexState *ls) {
 130   int old = ls->current;
 131   lua_assert(currIsNewline(ls));
 132   next(ls);  /* skip `\n' or `\r' */
 133   if (currIsNewline(ls) && ls->current != old)
 134     next(ls);  /* skip `\n\r' or `\r\n' */
 135   if (++ls->linenumber >= MAX_INT)
 136     luaX_syntaxerror(ls, "chunk has too many lines");
 137 }
 138 
 139 
 140 LUAI_FUNC void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) {
 141   ls->decpoint = '.';
 142   ls->L = L;
 143   ls->lookahead.token = TK_EOS;  /* no look-ahead token */
 144   ls->z = z;
 145   ls->fs = NULL;
 146   ls->linenumber = 1;
 147   ls->lastline = 1;
 148   ls->source = source;
 149   luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);  /* initialize buffer */
 150   next(ls);  /* read first char */
 151 }
 152 
 153 
 154 
 155 /*
 156 ** =======================================================
 157 ** LEXICAL ANALYZER
 158 ** =======================================================
 159 */
 160 
 161 
 162 
 163 static int check_next (LexState *ls, const char *set) {
 164   if (!strchr(set, ls->current))
 165     return 0;
 166   save_and_next(ls);
 167   return 1;
 168 }
 169 
 170 
 171 static void buffreplace (LexState *ls, char from, char to) {
 172   size_t n = luaZ_bufflen(ls->buff);
 173   char *p = luaZ_buffer(ls->buff);
 174   while (n--)
 175     if (p[n] == from) p[n] = to;
 176 }
 177 
 178 
 179 static void trydecpoint (LexState *ls, __attribute__ ((unused))SemInfo *seminfo) {
 180 #if 0
 181   /* format error: try to update decimal point separator */
 182   struct lconv *cv = localeconv();
 183   char old = ls->decpoint;
 184   ls->decpoint = (cv ? cv->decimal_point[0] : '.');
 185   buffreplace(ls, old, ls->decpoint);  /* try updated decimal separator */
 186   if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) {
 187     /* format error with correct decimal point: no more options */
 188     buffreplace(ls, ls->decpoint, '.');  /* undo change (for error message) */
 189 #endif
 190     luaX_lexerror(ls, "malformed number", TK_NUMBER);
 191 #if 0
 192   }
 193 #endif
 194 }
 195 
 196 
 197 /* LUA_NUMBER */
 198 static void read_numeral (LexState *ls, SemInfo *seminfo) {
 199   lua_assert(isdigit(ls->current));
 200   do {
 201     save_and_next(ls);
 202   } while (isdigit(ls->current) || ls->current == '.');
 203   if (check_next(ls, "Ee"))  /* `E'? */
 204     check_next(ls, "+-");  /* optional exponent sign */
 205   while (isalnum(ls->current) || ls->current == '_')
 206     save_and_next(ls);
 207   save(ls, '\0');
 208   buffreplace(ls, '.', ls->decpoint);  /* follow locale for decimal point */
 209   if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r))  /* format error? */
 210     trydecpoint(ls, seminfo); /* try to update decimal point separator */
 211 }
 212 
 213 
 214 static int skip_sep (LexState *ls) {
 215   int count = 0;
 216   int s = ls->current;
 217   lua_assert(s == '[' || s == ']');
 218   save_and_next(ls);
 219   while (ls->current == '=') {
 220     save_and_next(ls);
 221     count++;
 222   }
 223   return (ls->current == s) ? count : (-count) - 1;
 224 }
 225 
 226 
 227 static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
 228   int cont = 0;
 229   (void)(cont);  /* avoid warnings when `cont' is not used */
 230   save_and_next(ls);  /* skip 2nd `[' */
 231   if (currIsNewline(ls))  /* string starts with a newline? */
 232     inclinenumber(ls);  /* skip it */
 233   for (;;) {
 234     switch (ls->current) {
 235       case EOZ:
 236         luaX_lexerror(ls, (seminfo) ? "unfinished long string" :
 237                                    "unfinished long comment", TK_EOS);
 238         break;  /* to avoid warnings */
 239 #if defined(LUA_COMPAT_LSTR)
 240       case '[': {
 241         if (skip_sep(ls) == sep) {
 242           save_and_next(ls);  /* skip 2nd `[' */
 243           cont++;
 244 #if LUA_COMPAT_LSTR == 1
 245           if (sep == 0)
 246             luaX_lexerror(ls, "nesting of [[...]] is deprecated", '[');
 247 #endif
 248         }
 249         break;
 250       }
 251 #endif
 252       case ']': {
 253         if (skip_sep(ls) == sep) {
 254           save_and_next(ls);  /* skip 2nd `]' */
 255 #if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2
 256           cont--;
 257           if (sep == 0 && cont >= 0) break;
 258 #endif
 259           goto endloop;
 260         }
 261         break;
 262       }
 263       case '\n':
 264       case '\r': {
 265         save(ls, '\n');
 266         inclinenumber(ls);
 267         if (!seminfo) luaZ_resetbuffer(ls->buff);  /* avoid wasting space */
 268         break;
 269       }
 270       default: {
 271         if (seminfo) save_and_next(ls);
 272         else next(ls);
 273       }
 274     }
 275   } endloop:
 276   if (seminfo)
 277     seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
 278                                      luaZ_bufflen(ls->buff) - 2*(2 + sep));
 279 }
 280 
 281 
 282 static void read_string (LexState *ls, int del, SemInfo *seminfo) {
 283   save_and_next(ls);
 284   while (ls->current != del) {
 285     switch (ls->current) {
 286       case EOZ:
 287         luaX_lexerror(ls, "unfinished string", TK_EOS);
 288         continue;  /* to avoid warnings */
 289       case '\n':
 290       case '\r':
 291         luaX_lexerror(ls, "unfinished string", TK_STRING);
 292         continue;  /* to avoid warnings */
 293       case '\\': {
 294         int c;
 295         next(ls);  /* do not save the `\' */
 296         switch (ls->current) {
 297           case 'a': c = '\a'; break;
 298           case 'b': c = '\b'; break;
 299           case 'f': c = '\f'; break;
 300           case 'n': c = '\n'; break;
 301           case 'r': c = '\r'; break;
 302           case 't': c = '\t'; break;
 303           case 'v': c = '\v'; break;
 304           case '\n':  /* go through */
 305           case '\r': save(ls, '\n'); inclinenumber(ls); continue;
 306           case EOZ: continue;  /* will raise an error next loop */
 307           default: {
 308             if (!isdigit(ls->current))
 309               save_and_next(ls);  /* handles \\, \", \', and \? */
 310             else {  /* \xxx */
 311               int i = 0;
 312               c = 0;
 313               do {
 314                 c = 10*c + (ls->current-'0');
 315                 next(ls);
 316               } while (++i<3 && isdigit(ls->current));
 317               if (c > UCHAR_MAX)
 318                 luaX_lexerror(ls, "escape sequence too large", TK_STRING);
 319               save(ls, c);
 320             }
 321             continue;
 322           }
 323         }
 324         save(ls, c);
 325         next(ls);
 326         continue;
 327       }
 328       default:
 329         save_and_next(ls);
 330     }
 331   }
 332   save_and_next(ls);  /* skip delimiter */
 333   seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
 334                                    luaZ_bufflen(ls->buff) - 2);
 335 }
 336 
 337 
 338 static int llex (LexState *ls, SemInfo *seminfo) {
 339   luaZ_resetbuffer(ls->buff);
 340   for (;;) {
 341     switch (ls->current) {
 342       case '\n':
 343       case '\r': {
 344         inclinenumber(ls);
 345         continue;
 346       }
 347       case '-': {
 348         next(ls);
 349         if (ls->current != '-') return '-';
 350         /* else is a comment */
 351         next(ls);
 352         if (ls->current == '[') {
 353           int sep = skip_sep(ls);
 354           luaZ_resetbuffer(ls->buff);  /* `skip_sep' may dirty the buffer */
 355           if (sep >= 0) {
 356             read_long_string(ls, NULL, sep);  /* long comment */
 357             luaZ_resetbuffer(ls->buff);
 358             continue;
 359           }
 360         }
 361         /* else short comment */
 362         while (!currIsNewline(ls) && ls->current != EOZ)
 363           next(ls);
 364         continue;
 365       }
 366       case '[': {
 367         int sep = skip_sep(ls);
 368         if (sep >= 0) {
 369           read_long_string(ls, seminfo, sep);
 370           return TK_STRING;
 371         }
 372         else if (sep == -1) return '[';
 373         else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING);
 374       }
 375       case '=': {
 376         next(ls);
 377         if (ls->current != '=') return '=';
 378         else { next(ls); return TK_EQ; }
 379       }
 380       case '<': {
 381         next(ls);
 382         if (ls->current != '=') return '<';
 383         else { next(ls); return TK_LE; }
 384       }
 385       case '>': {
 386         next(ls);
 387         if (ls->current != '=') return '>';
 388         else { next(ls); return TK_GE; }
 389       }
 390       case '~': {
 391         next(ls);
 392         if (ls->current != '=') return '~';
 393         else { next(ls); return TK_NE; }
 394       }
 395       case '"':
 396       case '\'': {
 397         read_string(ls, ls->current, seminfo);
 398         return TK_STRING;
 399       }
 400       case '.': {
 401         save_and_next(ls);
 402         if (check_next(ls, ".")) {
 403           if (check_next(ls, "."))
 404             return TK_DOTS;   /* ... */
 405           else return TK_CONCAT;   /* .. */
 406         }
 407         else if (!isdigit(ls->current)) return '.';
 408         else {
 409           read_numeral(ls, seminfo);
 410           return TK_NUMBER;
 411         }
 412       }
 413       case EOZ: {
 414         return TK_EOS;
 415       }
 416       default: {
 417         if (isspace(ls->current)) {
 418           lua_assert(!currIsNewline(ls));
 419           next(ls);
 420           continue;
 421         }
 422         else if (isdigit(ls->current)) {
 423           read_numeral(ls, seminfo);
 424           return TK_NUMBER;
 425         }
 426         else if (isalpha(ls->current) || ls->current == '_') {
 427           /* identifier or reserved word */
 428           TString *ts;
 429           do {
 430             save_and_next(ls);
 431           } while (isalnum(ls->current) || ls->current == '_');
 432           ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
 433                                   luaZ_bufflen(ls->buff));
 434           if (ts->tsv.reserved > 0)  /* reserved word? */
 435             return ts->tsv.reserved - 1 + FIRST_RESERVED;
 436           else {
 437             seminfo->ts = ts;
 438             return TK_NAME;
 439           }
 440         }
 441         else {
 442           int c = ls->current;
 443           next(ls);
 444           return c;  /* single-char tokens (+ - / ...) */
 445         }
 446       }
 447     }
 448   }
 449 }
 450 
 451 
 452 LUAI_FUNC void luaX_next (LexState *ls) {
 453   ls->lastline = ls->linenumber;
 454   if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
 455     ls->t = ls->lookahead;  /* use this one */
 456     ls->lookahead.token = TK_EOS;  /* and discharge it */
 457   }
 458   else
 459     ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */
 460 }
 461 
 462 
 463 LUAI_FUNC void luaX_lookahead (LexState *ls) {
 464   lua_assert(ls->lookahead.token == TK_EOS);
 465   ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
 466 }
 467 

/* [<][>][^][v][top][bottom][index][help] */