00001
00002
00003
00004
00005
00006
00007
00008 #include <ctype.h>
00009 #include <locale.h>
00010 #include <string.h>
00011
00012 #define llex_c
00013 #define LUA_CORE
00014
00015 #include "lua.h"
00016
00017 #include "ldo.h"
00018 #include "llex.h"
00019 #include "lobject.h"
00020 #include "lparser.h"
00021 #include "lstate.h"
00022 #include "lstring.h"
00023 #include "ltable.h"
00024 #include "lzio.h"
00025
00026
00027
00028 #define next(ls) (ls->current = zgetc(ls->z))
00029
00030
00031
00032
00033 #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
00034
00035
00036
00037 const char *const luaX_tokens [] = {
00038 "and", "break", "do", "else", "elseif",
00039 "end", "false", "for", "function", "if",
00040 "in", "local", "nil", "not", "or", "repeat",
00041 "return", "then", "true", "until", "while",
00042 "..", "...", "==", ">=", "<=", "~=",
00043 "<number>", "<name>", "<string>", "<eof>",
00044 NULL
00045 };
00046
00047
00048 #define save_and_next(ls) (save(ls, ls->current), next(ls))
00049
00050
00051 static void save (LexState *ls, int c) {
00052 Mbuffer *b = ls->buff;
00053 if (b->n + 1 > b->buffsize) {
00054 size_t newsize;
00055 if (b->buffsize >= MAX_SIZET/2)
00056 luaX_lexerror(ls, "lexical element too long", 0);
00057 newsize = b->buffsize * 2;
00058 luaZ_resizebuffer(ls->L, b, newsize);
00059 }
00060 b->buffer[b->n++] = cast(char, c);
00061 }
00062
00063
00064 void luaX_init (lua_State *L) {
00065 int i;
00066 for (i=0; i<NUM_RESERVED; i++) {
00067 TString *ts = luaS_new(L, luaX_tokens[i]);
00068 luaS_fix(ts);
00069 lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN);
00070 ts->tsv.reserved = cast_byte(i+1);
00071 }
00072 }
00073
00074
00075 #define MAXSRC 80
00076
00077
00078 const char *luaX_token2str (LexState *ls, int token) {
00079 if (token < FIRST_RESERVED) {
00080 lua_assert(token == cast(unsigned char, token));
00081 return (iscntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) :
00082 luaO_pushfstring(ls->L, "%c", token);
00083 }
00084 else
00085 return luaX_tokens[token-FIRST_RESERVED];
00086 }
00087
00088
00089 static const char *txtToken (LexState *ls, int token) {
00090 switch (token) {
00091 case TK_NAME:
00092 case TK_STRING:
00093 case TK_NUMBER:
00094 save(ls, '\0');
00095 return luaZ_buffer(ls->buff);
00096 default:
00097 return luaX_token2str(ls, token);
00098 }
00099 }
00100
00101
00102 void luaX_lexerror (LexState *ls, const char *msg, int token) {
00103 char buff[MAXSRC];
00104 luaO_chunkid(buff, getstr(ls->source), MAXSRC);
00105 msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg);
00106 if (token)
00107 luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token));
00108 luaD_throw(ls->L, LUA_ERRSYNTAX);
00109 }
00110
00111
00112 void luaX_syntaxerror (LexState *ls, const char *msg) {
00113 luaX_lexerror(ls, msg, ls->t.token);
00114 }
00115
00116
00117 TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
00118 lua_State *L = ls->L;
00119 TString *ts = luaS_newlstr(L, str, l);
00120 TValue *o = luaH_setstr(L, ls->fs->h, ts);
00121 if (ttisnil(o))
00122 setbvalue(o, 1);
00123 return ts;
00124 }
00125
00126
00127 static void inclinenumber (LexState *ls) {
00128 int old = ls->current;
00129 lua_assert(currIsNewline(ls));
00130 next(ls);
00131 if (currIsNewline(ls) && ls->current != old)
00132 next(ls);
00133 if (++ls->linenumber >= MAX_INT)
00134 luaX_syntaxerror(ls, "chunk has too many lines");
00135 }
00136
00137
00138 void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) {
00139 ls->decpoint = '.';
00140 ls->L = L;
00141 ls->lookahead.token = TK_EOS;
00142 ls->z = z;
00143 ls->fs = NULL;
00144 ls->linenumber = 1;
00145 ls->lastline = 1;
00146 ls->source = source;
00147 luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);
00148 next(ls);
00149 }
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161 static int check_next (LexState *ls, const char *set) {
00162 if (!strchr(set, ls->current))
00163 return 0;
00164 save_and_next(ls);
00165 return 1;
00166 }
00167
00168
00169 static void buffreplace (LexState *ls, char from, char to) {
00170 size_t n = luaZ_bufflen(ls->buff);
00171 char *p = luaZ_buffer(ls->buff);
00172 while (n--)
00173 if (p[n] == from) p[n] = to;
00174 }
00175
00176
00177 static void trydecpoint (LexState *ls, SemInfo *seminfo) {
00178
00179 struct lconv *cv = localeconv();
00180 char old = ls->decpoint;
00181 ls->decpoint = (cv ? cv->decimal_point[0] : '.');
00182 buffreplace(ls, old, ls->decpoint);
00183 if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) {
00184
00185 buffreplace(ls, ls->decpoint, '.');
00186 luaX_lexerror(ls, "malformed number", TK_NUMBER);
00187 }
00188 }
00189
00190
00191
00192 static void read_numeral (LexState *ls, SemInfo *seminfo) {
00193 lua_assert(isdigit(ls->current));
00194 do {
00195 save_and_next(ls);
00196 } while (isdigit(ls->current) || ls->current == '.');
00197 if (check_next(ls, "Ee"))
00198 check_next(ls, "+-");
00199 while (isalnum(ls->current) || ls->current == '_')
00200 save_and_next(ls);
00201 save(ls, '\0');
00202 buffreplace(ls, '.', ls->decpoint);
00203 if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r))
00204 trydecpoint(ls, seminfo);
00205 }
00206
00207
00208 static int skip_sep (LexState *ls) {
00209 int count = 0;
00210 int s = ls->current;
00211 lua_assert(s == '[' || s == ']');
00212 save_and_next(ls);
00213 while (ls->current == '=') {
00214 save_and_next(ls);
00215 count++;
00216 }
00217 return (ls->current == s) ? count : (-count) - 1;
00218 }
00219
00220
00221 static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
00222 int cont = 0;
00223 (void)(cont);
00224 save_and_next(ls);
00225 if (currIsNewline(ls))
00226 inclinenumber(ls);
00227 for (;;) {
00228 switch (ls->current) {
00229 case EOZ:
00230 luaX_lexerror(ls, (seminfo) ? "unfinished long string" :
00231 "unfinished long comment", TK_EOS);
00232 break;
00233 #if defined(LUA_COMPAT_LSTR)
00234 case '[': {
00235 if (skip_sep(ls) == sep) {
00236 save_and_next(ls);
00237 cont++;
00238 #if LUA_COMPAT_LSTR == 1
00239 if (sep == 0)
00240 luaX_lexerror(ls, "nesting of [[...]] is deprecated", '[');
00241 #endif
00242 }
00243 break;
00244 }
00245 #endif
00246 case ']': {
00247 if (skip_sep(ls) == sep) {
00248 save_and_next(ls);
00249 #if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2
00250 cont--;
00251 if (sep == 0 && cont >= 0) break;
00252 #endif
00253 goto endloop;
00254 }
00255 break;
00256 }
00257 case '\n':
00258 case '\r': {
00259 save(ls, '\n');
00260 inclinenumber(ls);
00261 if (!seminfo) luaZ_resetbuffer(ls->buff);
00262 break;
00263 }
00264 default: {
00265 if (seminfo) save_and_next(ls);
00266 else next(ls);
00267 }
00268 }
00269 } endloop:
00270 if (seminfo)
00271 seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
00272 luaZ_bufflen(ls->buff) - 2*(2 + sep));
00273 }
00274
00275
00276 static void read_string (LexState *ls, int del, SemInfo *seminfo) {
00277 save_and_next(ls);
00278 while (ls->current != del) {
00279 switch (ls->current) {
00280 case EOZ:
00281 luaX_lexerror(ls, "unfinished string", TK_EOS);
00282 continue;
00283 case '\n':
00284 case '\r':
00285 luaX_lexerror(ls, "unfinished string", TK_STRING);
00286 continue;
00287 case '\\': {
00288 int c;
00289 next(ls);
00290 switch (ls->current) {
00291 case 'a': c = '\a'; break;
00292 case 'b': c = '\b'; break;
00293 case 'f': c = '\f'; break;
00294 case 'n': c = '\n'; break;
00295 case 'r': c = '\r'; break;
00296 case 't': c = '\t'; break;
00297 case 'v': c = '\v'; break;
00298 case '\n':
00299 case '\r': save(ls, '\n'); inclinenumber(ls); continue;
00300 case EOZ: continue;
00301 default: {
00302 if (!isdigit(ls->current))
00303 save_and_next(ls);
00304 else {
00305 int i = 0;
00306 c = 0;
00307 do {
00308 c = 10*c + (ls->current-'0');
00309 next(ls);
00310 } while (++i<3 && isdigit(ls->current));
00311 if (c > UCHAR_MAX)
00312 luaX_lexerror(ls, "escape sequence too large", TK_STRING);
00313 save(ls, c);
00314 }
00315 continue;
00316 }
00317 }
00318 save(ls, c);
00319 next(ls);
00320 continue;
00321 }
00322 default:
00323 save_and_next(ls);
00324 }
00325 }
00326 save_and_next(ls);
00327 seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
00328 luaZ_bufflen(ls->buff) - 2);
00329 }
00330
00331
00332 static int llex (LexState *ls, SemInfo *seminfo) {
00333 luaZ_resetbuffer(ls->buff);
00334 for (;;) {
00335 switch (ls->current) {
00336 case '\n':
00337 case '\r': {
00338 inclinenumber(ls);
00339 continue;
00340 }
00341 case '-': {
00342 next(ls);
00343 if (ls->current != '-') return '-';
00344
00345 next(ls);
00346 if (ls->current == '[') {
00347 int sep = skip_sep(ls);
00348 luaZ_resetbuffer(ls->buff);
00349 if (sep >= 0) {
00350 read_long_string(ls, NULL, sep);
00351 luaZ_resetbuffer(ls->buff);
00352 continue;
00353 }
00354 }
00355
00356 while (!currIsNewline(ls) && ls->current != EOZ)
00357 next(ls);
00358 continue;
00359 }
00360 case '[': {
00361 int sep = skip_sep(ls);
00362 if (sep >= 0) {
00363 read_long_string(ls, seminfo, sep);
00364 return TK_STRING;
00365 }
00366 else if (sep == -1) return '[';
00367 else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING);
00368 }
00369 case '=': {
00370 next(ls);
00371 if (ls->current != '=') return '=';
00372 else { next(ls); return TK_EQ; }
00373 }
00374 case '<': {
00375 next(ls);
00376 if (ls->current != '=') return '<';
00377 else { next(ls); return TK_LE; }
00378 }
00379 case '>': {
00380 next(ls);
00381 if (ls->current != '=') return '>';
00382 else { next(ls); return TK_GE; }
00383 }
00384 case '~': {
00385 next(ls);
00386 if (ls->current != '=') return '~';
00387 else { next(ls); return TK_NE; }
00388 }
00389 case '"':
00390 case '\'': {
00391 read_string(ls, ls->current, seminfo);
00392 return TK_STRING;
00393 }
00394 case '.': {
00395 save_and_next(ls);
00396 if (check_next(ls, ".")) {
00397 if (check_next(ls, "."))
00398 return TK_DOTS;
00399 else return TK_CONCAT;
00400 }
00401 else if (!isdigit(ls->current)) return '.';
00402 else {
00403 read_numeral(ls, seminfo);
00404 return TK_NUMBER;
00405 }
00406 }
00407 case EOZ: {
00408 return TK_EOS;
00409 }
00410 default: {
00411 if (isspace(ls->current)) {
00412 lua_assert(!currIsNewline(ls));
00413 next(ls);
00414 continue;
00415 }
00416 else if (isdigit(ls->current)) {
00417 read_numeral(ls, seminfo);
00418 return TK_NUMBER;
00419 }
00420 else if (isalpha(ls->current) || ls->current == '_') {
00421
00422 TString *ts;
00423 do {
00424 save_and_next(ls);
00425 } while (isalnum(ls->current) || ls->current == '_');
00426 ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
00427 luaZ_bufflen(ls->buff));
00428 if (ts->tsv.reserved > 0)
00429 return ts->tsv.reserved - 1 + FIRST_RESERVED;
00430 else {
00431 seminfo->ts = ts;
00432 return TK_NAME;
00433 }
00434 }
00435 else {
00436 int c = ls->current;
00437 next(ls);
00438 return c;
00439 }
00440 }
00441 }
00442 }
00443 }
00444
00445
00446 void luaX_next (LexState *ls) {
00447 ls->lastline = ls->linenumber;
00448 if (ls->lookahead.token != TK_EOS) {
00449 ls->t = ls->lookahead;
00450 ls->lookahead.token = TK_EOS;
00451 }
00452 else
00453 ls->t.token = llex(ls, &ls->t.seminfo);
00454 }
00455
00456
00457 void luaX_lookahead (LexState *ls) {
00458 lua_assert(ls->lookahead.token == TK_EOS);
00459 ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
00460 }
00461