[Lua_Trace] 1. Lua數據結構 - TString

摘要:[Lua_Trace] 1. Lua數據結構 - TString

此源碼分析的版本為 : Lua 5.3.1

2. TString : 字串結構


//lobject.h

/*
** Header for string value; string bytes follow the end of this structure
** (aligned according to 'UTString'; see next).
*/
typedef struct TString {
  CommonHeader;
  lu_byte extra;  /* reserved words for short strings; "has hash" for longs */
  lu_byte shrlen;  /* length for short strings */
  unsigned int hash;
  union {
    size_t lnglen;  /* length for long strings */
    struct TString *hnext;  /* linked list for hash table */
  } u;
} TString;

TString結構由上而下的元素共有:

CommonHeader : GCObject的共有定義。

extra : 短字符串(LUA_TSHRSTR)下,非0之數值表示GC不回收的保留字,如下luaX_tokens定義了Lua所有的保留字。


//llex.c

/* ORDER RESERVED */
static const char *const luaX_tokens [] = {
    "and", "break", "do", "else", "elseif",
    "end", "false", "for", "function", "goto", "if",
    "in", "local", "nil", "not", "or", "repeat",
    "return", "then", "true", "until", "while",
    "//", "..", "...", "==", ">=", "<=", "~=",
    "<<", ">>", "::", "",
    "", "", "", ""
};

void luaX_init (lua_State *L) {
  int i;
  TString *e = luaS_newliteral(L, LUA_ENV);  /* create env name */
  luaC_fix(L, obj2gco(e));  /* never collect this name */
  for (i=0; iextra = cast_byte(i+1);  /* reserved word */
  }
}

extra : 長字符串(LUA_TLNGSTR)則表示是否有私有的hash table,有,設定為1。

shrlen : 字串長度 (LUA_TSHRSTR使用)。

hash : 雜湊值,由雜湊演算法產生。

u : LUA_TLNGSTR下表示為字串長度(lnglen),LUA_TSHRSTR則表示為hash table的鏈結(*hnext)。

 

創建TString時,Lua會在TString(16 bytes)後開一空間存儲字符串數據,這樣的行為可由如下源碼中察覺。


//lstring.c

*
** creates a new string object
*/
static TString *createstrobj (lua_State *L, const char *str, size_t l,
                              int tag, unsigned int h) {
  TString *ts;
  GCObject *o;
  size_t totalsize;  /* total size of TString object */
  totalsize = sizelstring(l);
  o = luaC_newobj(L, tag, totalsize);
  ts = gco2ts(o);
  ts->hash = h;
  ts->extra = 0;
  memcpy(getaddrstr(ts), str, l * sizeof(char));
  getaddrstr(ts)[l] = '\0';  /* ending 0 */
  return ts;
}

換句話說,欲取得字符串的方法可透過如下方式:


TString* p;
char* str=(char*)(p+1); //TString的字符串指針

Lua有針對短字符串(LUA_TSHRSTR : 長度小於40)採取節省記憶體與效能優化的措施,當多個同為短字符串且內容皆相同的情境下,那它們皆使用同一份TString物件,實現方式是通過字串來得到一個hash值,並透過全域hash表(g->strt)查找是否存在相同的hash值(重複的字串),如果已經有了,那麼就直接引用該物件(並進行引用資料++操作),反之,Lua會創建一TString放在hash表中對應的位置。


//lstring.c

/*
** new string (with explicit length)
*/
TString *luaS_newlstr (lua_State *L, const char *str, size_t l) {
  if (l <= LUAI_MAXSHORTLEN)  /* short string? */
    return internshrstr(L, str, l);
  else {
    TString *ts;
    if (l + 1 > (MAX_SIZE - sizeof(TString))/sizeof(char))
      luaM_toobig(L);
    ts = createstrobj(L, str, l, LUA_TLNGSTR, G(L)->seed);
    ts->u.lnglen = l;
    return ts;
  }
}

/*
** checks whether short string exists and reuses it or creates a new one
*/
static TString *internshrstr (lua_State *L, const char *str, size_t l) {
  TString *ts;
  global_State *g = G(L);
  unsigned int h = luaS_hash(str, l, g->seed);
  TString **list = &g->strt.hash[lmod(h, g->strt.size)];
  for (ts = *list; ts != NULL; ts = ts->u.hnext) {
    if (l == ts->shrlen &&
        (memcmp(str, getstr(ts), l * sizeof(char)) == 0)) {
      /* found! */
      if (isdead(g, ts))  /* dead (but not collected yet)? */
        changewhite(ts);  /* resurrect it */
      return ts;
    }
  }
  if (g->strt.nuse >= g->strt.size && g->strt.size <= MAX_INT/2) {
    luaS_resize(L, g->strt.size * 2);
    list = &g->strt.hash[lmod(h, g->strt.size)];  /* recompute with new size */
  }
  ts = createstrobj(L, str, l, LUA_TSHRSTR, h);
  ts->shrlen = cast_byte(l);
  ts->u.hnext = *list;
  *list = ts;
  g->strt.nuse++;
  return ts;
}

有任何錯誤請指正,後續我將盡可能的再補充詳細