
lua-users home


有人表示,Lua 的正則運算式很受限。Lua 的優點在於很容易新增其他功能。我們不需要華麗的正則運算式,因為我們可以很容易地使用 Peter Bumbulis 的 re2c [1] 來新增詞法分析器。

以下是辨識 Lua 5.0 語法與關鍵字的詞法掃描器。函式 LexLua 會採用要掃描的字串,並傳回執行掃描的函式。傳回的函式是一個封閉函式,會繫結到 C 函式 scan,以及兩個 upvalue:要掃描的字串,以及追蹤狀態的使用者資料。每次呼叫時,它會傳回下一個代碼,或是在到達字串尾端時傳回 nil

C 程式碼

re2c 會將特殊註解中的正則運算式置換為掃描器的程式碼。以下是 re2c 的輸入 [2] 和輸出 [3]


#include "lua.h"
#include "lauxlib.h"

const char *name    = "<name>";
const char *number  = "<number>";
const char *literal = "<literal>";

#define YYCTYPE  char
#define YYCURSOR cursor
#define YYMARKER marker
#define YYLIMIT  limit
#define YYFILL(n)

#define save_state(i,s,c,m,l) {\
  (s)->cursor = (c)-(i); \
  (s)->marker = (m)-(i); \
  (s)->limit  = (l)-(i); \

typedef struct Scanner {
  int cursor, marker, limit;
} Scanner;

static Scanner *check_Scanner(lua_State *L, int index)
  luaL_check_type(L, index, LUA_TUSERDATA);
  return (Scanner*)lua_touserdata(L,index);

static int scan (lua_State *L)
  const char *input = luaL_check_string(L, lua_upvalueindex(1));
  Scanner    *state = check_Scanner(L,lua_upvalueindex(2));
  char *cursor = (char*)input + state->cursor;
  char *marker = (char*)input + state->marker;
  char *limit  = (char*)input + state->limit;
  char *white_space, *token;
  const char *ret = 0;
  int nest_count = 0;


  D        = [0-9] ;
  E        = [Ee] [+-]? D+ ;
  L        = [a-zA-Z_] ;

  NUMBER   = ( D+ | D* "." D+ | D+ "." D* ) E? ;

  WS       = [ \t\n\v\f]+ ;
  LF       = [\n] ;
  END      = [\000] ;
  ANY      = [\000-\377] \ END ;

  ESC      = [\\] ;
  SQ       = ['] ;
  DQ       = ["] ;

  STRING1  = SQ ( ANY \ SQ \ ESC | ESC ANY )* SQ ;
  STRING2  = DQ ( ANY \ DQ \ ESC | ESC ANY )* DQ ;



  white_space = cursor; /* start of white space */


  token = cursor;       /* start of token */


  WS               { goto Space; }
  "--[["           { nest_count=0; goto LongComment; }
  "--" | "#"       { goto Comment; }
  "and"            { goto Return; }
  "break"          { goto Return; }
  "do"             { goto Return; }
  "else"           { goto Return; }
  "elseif"         { goto Return; }
  "end"            { goto Return; }
  "false"          { goto Return; }
  "for"            { goto Return; }
  "function"       { goto Return; }
  "global"         { goto Return; }
  "if"             { goto Return; }
  "in"             { goto Return; }
  "local"          { goto Return; }
  "nil"            { goto Return; }
  "not"            { goto Return; }
  "or"             { goto Return; }
  "repeat"         { goto Return; }
  "return"         { goto Return; }
  "then"           { goto Return; }
  "true"           { goto Return; }
  "until"          { goto Return; }
  "while"          { goto Return; }
  "..."            { goto Return; }
  ".."             { goto Return; }
  "=="             { goto Return; }
  ">="             { goto Return; }
  "<="             { goto Return; }
  "~="             { goto Return; }
  "[["             { nest_count=0; goto LongString; }

  L ( L | D )*     { ret = name;    goto Return; }
  NUMBER           { ret = number;  goto Return; }
  STRING1          { ret = literal; goto Return; }
  STRING2          { ret = literal; goto Return; }

  ANY              { goto Return; }
  END              { goto TheEnd; }




  "[["             { nest_count++; goto LongString; }
  "]]"             { if( nest_count == 0 ) { ret = literal; goto Return; }
                     nest_count--; goto LongString; }

  ANY              { goto LongString; }
  END              { luaL_error(L,"unfinished long string"); }




  ( ANY \ LF )*    { goto Space; }
  END              { goto TheEnd; }




  "[["             { nest_count++; goto LongComment; }
  "]]"             { if( nest_count == 0 ) goto Space;
                     nest_count--; goto LongComment; }

  ANY              { goto LongComment; }
  END              { luaL_error(L,"unfinished long comment"); }


  luaL_error(L,"impossible"); /* die */


  if( --cursor != limit ) luaL_error(L,"didn't reach end of input"); /* die */
  lua_pushlstring(L, white_space, token - white_space );
  return 3; /* nil, nil, ws */


  lua_pushlstring(L, token, cursor - token );
  if( ret ) lua_pushstring(L, ret );
  else lua_pushnil(L);
  lua_pushlstring(L, white_space, token - white_space );
  return 3; /* token, type, ws */

static int scanner (lua_State *L)
  Scanner *s;
  int len;
  const char *input = luaL_check_lstr(L, 1, &len);
  s = (Scanner*)lua_newuserdata(L, sizeof(Scanner));
  s->cursor = 0;
  s->marker = 0;
  s->limit  = len;
  lua_pushcclosure(L, scan, 2); /* string, userdata */
  return 1;

int openLexLua (lua_State *L)
  lua_register(L, "LexLua", scanner);
  return 0;


這個程式碼可以編譯成 Unix 共用 lib 如下所示

re2c -s LexLua.c > lex.c
gcc -fPIC -g -c lex.c -o lexlua.o
gcc -g -shared -Wl,-soname,liblexlua.so -o liblexlua.so.1.0.0 lexlua.o -L/usr/local/lib/ -llua -llualib

cp liblexlua.so.1.0.0 /usr/local/lib
cd /usr/local/lib
ln -s liblexlua.so.1.0.0 liblexlua.so
ldconfig -v /usr/local/lib

Lua 測試程式碼

$ lua
Lua 5.0 (alpha)  Copyright (C) 1994-2002 Tecgraf, PUC-Rio
> assert(loadlib('/usr/local/lib/liblexlua.so','openLexLua'))()
> for tok, tt in LexLua[[ for i = 1,10 do print(i*2) end ]] do print(tok,tt) end
for     nil
i       <name>
=       nil
1       <number>
,       nil
10      <number>
do      nil
print   <name>
(       nil
i       <name>
*       nil
2       <number>
)       nil
end     nil

如要瞭解如何為 Lua 程式碼加入色彩的範例,請參閱 [4]LuaToHtml

最新異動 · 喜好設定
編輯 · 歷史記錄
最後修改時間為 2008 年 8 月 6 日上午 9:41 GMT (格林威治時間) (相異處)