詞法分析 |
|
有人表示,Lua 的正則運算式很受限。Lua 的優點在於很容易新增其他功能。我們不需要華麗的正則運算式,因為我們可以很容易地使用 Peter Bumbulis 的 re2c
[1] 來新增詞法分析器。
以下是辨識 Lua 5.0 語法與關鍵字的詞法掃描器。函式 LexLua
會採用要掃描的字串,並傳回執行掃描的函式。傳回的函式是一個封閉函式,會繫結到 C 函式 scan
,以及兩個 upvalue:要掃描的字串,以及追蹤狀態的使用者資料。每次呼叫時,它會傳回下一個代碼,或是在到達字串尾端時傳回 nil
。
re2c
會將特殊註解中的正則運算式置換為掃描器的程式碼。以下是 re2c
的輸入 [2] 和輸出 [3]。
/* ============================================================================== LexLua.c ============================================================================== */ #include "lua.h" #include "lauxlib.h" const char *name = "<name>"; const char *number = "<number>"; const char *literal = "<literal>"; #define YYCTYPE char #define YYCURSOR cursor #define YYMARKER marker #define YYLIMIT limit #define YYFILL(n) #define save_state(i,s,c,m,l) {\ (s)->cursor = (c)-(i); \ (s)->marker = (m)-(i); \ (s)->limit = (l)-(i); \ } typedef struct Scanner { int cursor, marker, limit; } Scanner; static Scanner *check_Scanner(lua_State *L, int index) { luaL_check_type(L, index, LUA_TUSERDATA); return (Scanner*)lua_touserdata(L,index); } static int scan (lua_State *L) { const char *input = luaL_check_string(L, lua_upvalueindex(1)); Scanner *state = check_Scanner(L,lua_upvalueindex(2)); char *cursor = (char*)input + state->cursor; char *marker = (char*)input + state->marker; char *limit = (char*)input + state->limit; char *white_space, *token; const char *ret = 0; int nest_count = 0; /*!re2c D = [0-9] ; E = [Ee] [+-]? D+ ; L = [a-zA-Z_] ; NUMBER = ( D+ | D* "." D+ | D+ "." D* ) E? ; WS = [ \t\n\v\f]+ ; LF = [\n] ; END = [\000] ; ANY = [\000-\377] \ END ; ESC = [\\] ; SQ = ['] ; DQ = ["] ; STRING1 = SQ ( ANY \ SQ \ ESC | ESC ANY )* SQ ; STRING2 = DQ ( ANY \ DQ \ ESC | ESC ANY )* DQ ; */ Begin: white_space = cursor; /* start of white space */ Space: token = cursor; /* start of token */ /*!re2c WS { goto Space; } "--[[" { nest_count=0; goto LongComment; } "--" | "#" { goto Comment; } "and" { goto Return; } "break" { goto Return; } "do" { goto Return; } "else" { goto Return; } "elseif" { goto Return; } "end" { goto Return; } "false" { goto Return; } "for" { goto Return; } "function" { goto Return; } "global" { goto Return; } "if" { goto Return; } "in" { goto Return; } "local" { goto Return; } "nil" { goto Return; } "not" { goto Return; } "or" { goto Return; } "repeat" { goto Return; } "return" { goto Return; } "then" { goto Return; } "true" { goto Return; } "until" { goto Return; } "while" { goto Return; } "..." { goto Return; } ".." { goto Return; } "==" { goto Return; } ">=" { goto Return; } "<=" { goto Return; } "~=" { goto Return; } "[[" { nest_count=0; goto LongString; } L ( L | D )* { ret = name; goto Return; } NUMBER { ret = number; goto Return; } STRING1 { ret = literal; goto Return; } STRING2 { ret = literal; goto Return; } ANY { goto Return; } END { goto TheEnd; } */ LongString: /*!re2c "[[" { nest_count++; goto LongString; } "]]" { if( nest_count == 0 ) { ret = literal; goto Return; } nest_count--; goto LongString; } ANY { goto LongString; } END { luaL_error(L,"unfinished long string"); } */ Comment: /*!re2c ( ANY \ LF )* { goto Space; } END { goto TheEnd; } */ LongComment: /*!re2c "[[" { nest_count++; goto LongComment; } "]]" { if( nest_count == 0 ) goto Space; nest_count--; goto LongComment; } ANY { goto LongComment; } END { luaL_error(L,"unfinished long comment"); } */ luaL_error(L,"impossible"); /* die */ TheEnd: if( --cursor != limit ) luaL_error(L,"didn't reach end of input"); /* die */ lua_pushnil(L); lua_pushnil(L); lua_pushlstring(L, white_space, token - white_space ); save_state(input,state,cursor,marker,limit); return 3; /* nil, nil, ws */ Return: lua_pushlstring(L, token, cursor - token ); if( ret ) lua_pushstring(L, ret ); else lua_pushnil(L); lua_pushlstring(L, white_space, token - white_space ); save_state(input,state,cursor,marker,limit); return 3; /* token, type, ws */ } static int scanner (lua_State *L) { Scanner *s; int len; const char *input = luaL_check_lstr(L, 1, &len); s = (Scanner*)lua_newuserdata(L, sizeof(Scanner)); s->cursor = 0; s->marker = 0; s->limit = len; lua_pushcclosure(L, scan, 2); /* string, userdata */ return 1; } int openLexLua (lua_State *L) { lua_register(L, "LexLua", scanner); return 0; }
這個程式碼可以編譯成 Unix 共用 lib 如下所示
re2c -s LexLua.c > lex.c gcc -fPIC -g -c lex.c -o lexlua.o gcc -g -shared -Wl,-soname,liblexlua.so -o liblexlua.so.1.0.0 lexlua.o -L/usr/local/lib/ -llua -llualib su cp liblexlua.so.1.0.0 /usr/local/lib cd /usr/local/lib ln -s liblexlua.so.1.0.0 liblexlua.so ldconfig -v /usr/local/lib
$ lua Lua 5.0 (alpha) Copyright (C) 1994-2002 Tecgraf, PUC-Rio > assert(loadlib('/usr/local/lib/liblexlua.so','openLexLua'))() > for tok, tt in LexLua[[ for i = 1,10 do print(i*2) end ]] do print(tok,tt) end for nil i <name> = nil 1 <number> , nil 10 <number> do nil print <name> ( nil i <name> * nil 2 <number> ) nil end nil >
如要瞭解如何為 Lua 程式碼加入色彩的範例,請參閱 [4] 或 LuaToHtml