Skip to content

Commit

Permalink
Switch GET_BYTE_FN to LUA_LOAD_BYTE_FN
Browse files Browse the repository at this point in the history
This is intended to be a more portable abstraction: rather than
hard-coding offsets in the macro invocations, it relies on the existence
of __builtin_offset and its availability even within inline asm blocks.

While here, rename some of the functions involved to hopefully improve
clarity
  • Loading branch information
nwf authored and marcelstoer committed Feb 25, 2024
1 parent 06c99e5 commit 7c675e8
Show file tree
Hide file tree
Showing 12 changed files with 78 additions and 61 deletions.
2 changes: 1 addition & 1 deletion app/lua53/lapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ LUA_API const char *lua_tolstring (lua_State *L, int idx, size_t *len) {
LUA_API size_t lua_rawlen (lua_State *L, int idx) {
StkId o = index2addr(L, idx);
switch (ttype(o)) {
case LUA_TSHRSTR: return getshrlen(tsvalue(o));
case LUA_TSHRSTR: return getstrshrlen(tsvalue(o));
case LUA_TLNGSTR: return tsvalue(o)->u.lnglen;
case LUA_TUSERDATA: return uvalue(o)->len;
case LUA_TTBLRAM: return luaH_getn(hvalue(o));
Expand Down
5 changes: 3 additions & 2 deletions app/lua53/ldump.c
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ static void DumpString (const TString *s, DumpState *D) {
if (s == NULL) {
DumpByte(LUAU_TSSTRING + 0, D);
} else {
lu_byte tt = (gettt(s) == LUA_TSHRSTR) ? LUAU_TSSTRING : LUAU_TLSTRING;
lu_byte tt = (gettt((struct GCObject *)s) == LUA_TSHRSTR) \
? LUAU_TSSTRING : LUAU_TLSTRING;
size_t l = tsslen(s);
const char *str = getstr(s);
#ifdef LUA_USE_HOST
Expand Down Expand Up @@ -314,7 +315,7 @@ static void addTS (TString *ts, DumpState *D) {
return;
if (ttisnil(luaH_getstr(D->stringIndex, ts))) {
TValue k, v, *slot;
gettt(ts)<=LUA_TSHRSTR ? D->sTScnt++ : D->lTScnt++;
gettt((struct GCObject *)ts)<=LUA_TSHRSTR ? D->sTScnt++ : D->lTScnt++;
setsvalue(L, &k, ts);
setivalue(&v, D->sTScnt + D->lTScnt);
slot = luaH_set(L, D->stringIndex, &k);
Expand Down
10 changes: 5 additions & 5 deletions app/lua53/lgc.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,23 +85,23 @@
#define WHITEBITS bit2mask(WHITE0BIT, WHITE1BIT)


#define iswhite(x) testbits(getmarked(x), WHITEBITS)
#define isblack(x) testbit(getmarked(x), BLACKBIT)
#define iswhite(x) testbits(getmarked((struct GCObject *)x), WHITEBITS)
#define isblack(x) testbit(getmarked((struct GCObject *)x), BLACKBIT)
#define isgray(x) /* neither white nor black */ \
(!testbits(getmarked(x), WHITEBITS | bitmask(BLACKBIT)))
(!testbits(getmarked((struct GCObject *)x), WHITEBITS | bitmask(BLACKBIT)))

#define tofinalize(x) testbit(getmarked(x), FINALIZEDBIT)

#define otherwhite(g) ((g)->currentwhite ^ WHITEBITS)
#define isdeadm(ow,m) (!(((m) ^ WHITEBITS) & (ow)))
#define isdead(g,v) isdeadm(otherwhite(g), getmarked(v))
#define isdead(g,v) isdeadm(otherwhite(g), getmarked((struct GCObject *)v))

#define changewhite(x) ((x)->marked ^= WHITEBITS)
#define gray2black(x) l_setbit((x)->marked, BLACKBIT)

#define luaC_white(g) cast(lu_byte, (g)->currentwhite & WHITEBITS)

#define isLFSobj(x) testbit(getmarked(x), LFSBIT)
#define isLFSobj(x) testbit(getmarked((struct GCObject *)x), LFSBIT)
#define setLFSbit(x) l_setbit((x)->marked, LFSBIT)
/*
** Does one step of collection when debt becomes positive. 'pre'/'pos'
Expand Down
46 changes: 13 additions & 33 deletions app/lua53/lobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,27 +70,6 @@
/* mark a tag as collectable */
#define ctb(t) ((t) | BIT_ISCOLLECTABLE)

/*
** Byte field access macro. On ESP targets this causes the compiler to emit
** a l32i + extui instruction pair instead of a single l8ui avoiding a call
** the S/W unaligned exception handler. This is used to force aligned access
** to commonly accessed fields in Flash-based record structures. It is not
** needed for RAM-only structures.
**
** wo is the offset of aligned word in bytes 0,4,8,..
** bo is the field within the word in bits 0..31
*/
#ifdef LUA_USE_ESP
#define GET_BYTE_FN(name,t,wo,bo) \
static inline lu_int32 get ## name(const void *o) { \
lu_int32 res; /* extract named field */ \
asm ("l32i %0, %1, " #wo "; extui %0, %0, " #bo ", 8;" : "=r"(res) : "r"(o) : );\
return res; }
#else
#define GET_BYTE_FN(name,t,wo,bo) \
static inline lu_byte get ## name(const void *o) { return (cast(const t *,o))->name; }
#endif

/*
** Common type for all collectable objects
*/
Expand All @@ -110,8 +89,8 @@ typedef struct GCObject GCObject;
struct GCObject {
CommonHeader;
};
GET_BYTE_FN(tt,GCObject,4,0)
GET_BYTE_FN(marked,GCObject,4,8)
LUA_LOAD_BYTE_FN(gettt, struct GCObject, tt);
LUA_LOAD_BYTE_FN(getmarked, struct GCObject, marked);


/*
Expand Down Expand Up @@ -258,7 +237,7 @@ typedef struct lua_TValue {

#define setsvalue(L,obj,x) \
{ TValue *io = (obj); TString *x_ = (x); \
val_(io).gc = obj2gco(x_); settt_(io, ctb(gettt(x_))); \
val_(io).gc = obj2gco(x_); settt_(io, ctb(gettt((struct GCObject *)x_))); \
checkliveness(L,io); }

#define setuvalue(L,obj,x) \
Expand All @@ -283,7 +262,7 @@ typedef struct lua_TValue {

#define sethvalue(L,obj,x) \
{ TValue *io = (obj); Table *x_ = (x); \
val_(io).gc = obj2gco(x_); settt_(io, ctb(gettt(x_))); \
val_(io).gc = obj2gco(x_); settt_(io, ctb(gettt((struct GCObject *)x_))); \
checkliveness(L,io); }

#define setdeadvalue(obj) settt_(obj, LUA_TDEADKEY)
Expand Down Expand Up @@ -344,8 +323,8 @@ typedef struct TString {
struct TString *hnext; /* linked list for hash table */
} u;
} TString;
GET_BYTE_FN(extra,TString,4,16)
GET_BYTE_FN(shrlen,TString,4,24)
LUA_LOAD_BYTE_FN(getstrextra, TString, extra)
LUA_LOAD_BYTE_FN(getstrshrlen, TString, shrlen)


/*
Expand All @@ -369,7 +348,8 @@ typedef union UTString {
#define svalue(o) getstr(tsvalue(o))

/* get string length from 'TString *s' */
#define tsslen(s) (gettt(s) == LUA_TSHRSTR ? getshrlen(s) : (s)->u.lnglen)
#define tsslen(s) \
(gettt((struct GCObject *)s) == LUA_TSHRSTR ? getstrshrlen(s) : (s)->u.lnglen)

/* get string length from 'TValue *o' */
#define vslen(o) tsslen(tsvalue(o))
Expand Down Expand Up @@ -463,9 +443,9 @@ typedef struct Proto {
GCObject *gclist;
} Proto;

GET_BYTE_FN(numparams,Proto,4,16)
GET_BYTE_FN(is_vararg,Proto,4,24)
GET_BYTE_FN(maxstacksize,Proto,8,0)
LUA_LOAD_BYTE_FN(getnumparams,Proto,numparams)
LUA_LOAD_BYTE_FN(getis_vararg,Proto,is_vararg)
LUA_LOAD_BYTE_FN(getmaxstacksize,Proto,maxstacksize)


/*
Expand Down Expand Up @@ -555,8 +535,8 @@ typedef struct Table {
GCObject *gclist;
} Table;

GET_BYTE_FN(flags,Table,4,16)
GET_BYTE_FN(lsizenode,Table,4,24)
LUA_LOAD_BYTE_FN(gettblflags,Table,flags)
LUA_LOAD_BYTE_FN(gettbllsizenode,Table,lsizenode)


typedef const struct ROTable_entry {
Expand Down
11 changes: 6 additions & 5 deletions app/lua53/lstring.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@
*/
int luaS_eqlngstr (TString *a, TString *b) {
size_t len = a->u.lnglen;
lua_assert(gettt(a) == LUA_TLNGSTR && gettt(b) == LUA_TLNGSTR);
lua_assert(gettt((struct GCObject *)a) == LUA_TLNGSTR
&& gettt((struct GCObject *)b) == LUA_TLNGSTR);
return (a == b) || /* same instance or... */
((len == b->u.lnglen) && /* equal length and ... */
(memcmp(getstr(a), getstr(b), len) == 0)); /* equal contents */
Expand All @@ -55,8 +56,8 @@ unsigned int luaS_hash (const char *str, size_t l, unsigned int seed) {


unsigned int luaS_hashlongstr (TString *ts) {
lua_assert(ts->tt == LUA_TLNGSTR);
if (getextra(ts) == 0) { /* no hash? */
lua_assert(gettt((struct GCObject *)ts) == LUA_TLNGSTR);
if (getstrextra(ts) == 0) { /* no hash? */
ts->hash = luaS_hash(getstr(ts), ts->u.lnglen, ts->hash);
ts->extra = 1; /* now it has its hash */
}
Expand Down Expand Up @@ -162,7 +163,7 @@ static TString *internshrstr (lua_State *L, const char *str, size_t l) {
TString **list = &g->strt.hash[lmod(h, g->strt.size)];
lua_assert(str != NULL); /* otherwise 'memcmp'/'memcpy' are undefined */
for (ts = *list; ts != NULL; ts = ts->u.hnext) {
if (l == getshrlen(ts) &&
if (l == getstrshrlen(ts) &&
(memcmp(str, getstr(ts), l * sizeof(char)) == 0)) {
/* found! */
if (isdead(g, ts)) /* dead (but not collected yet)? */
Expand All @@ -178,7 +179,7 @@ static TString *internshrstr (lua_State *L, const char *str, size_t l) {
for (ts = g->ROstrt.hash[lmod(h, g->ROstrt.size)];
ts != NULL;
ts = ts->u.hnext) {
if (l == getshrlen(ts) &&
if (l == getstrshrlen(ts) &&
memcmp(str, getstr(ts), l * sizeof(char)) == 0) {
/* found in ROstrt! */
return ts;
Expand Down
6 changes: 4 additions & 2 deletions app/lua53/lstring.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@
/*
** test whether a string is a reserved word
*/
#define isreserved(s) (gettt(s) == LUA_TSHRSTR && getextra(s) > 0)
#define isreserved(s) \
(gettt((struct GCObject *)s) == LUA_TSHRSTR && getstrextra(s) > 0)


/*
** equality for short strings, which are always internalized
*/
#define eqshrstr(a,b) check_exp(gettt(a) == LUA_TSHRSTR, (a) == (b))
#define eqshrstr(a,b) \
check_exp(gettt((struct GCObject *)a) == LUA_TSHRSTR, (a) == (b))


LUAI_FUNC unsigned int luaS_hash (const char *str, size_t l, unsigned int seed);
Expand Down
14 changes: 7 additions & 7 deletions app/lua53/ltable.c
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,7 @@ const TValue *luaH_getshortstr (Table *t, TString *key) {
if (isrotable(t))
return rotable_findentry((ROTable*) t, key, NULL);
n = hashstr(t, key);
lua_assert(gettt(key) == LUA_TSHRSTR);
lua_assert(gettt((struct GCObject *)key) == LUA_TSHRSTR);
for (;;) { /* check whether 'key' is somewhere in the chain */
const TValue *k = gkey(n);
if (ttisshrstring(k) && eqshrstr(tsvalue(k), key))
Expand Down Expand Up @@ -592,7 +592,7 @@ static const TValue *getgeneric (Table *t, const TValue *key) {


const TValue *luaH_getstr (Table *t, TString *key) {
if (gettt(key) == LUA_TSHRSTR)
if (gettt((struct GCObject *)key) == LUA_TSHRSTR)
return luaH_getshortstr(t, key);
else { /* for long strings, use generic case */
TValue ko;
Expand Down Expand Up @@ -736,16 +736,16 @@ int luaH_isdummy (const Table *t) { return isdummy(t); }
*/
static const TValue* rotable_findentry(ROTable *t, TString *key, unsigned *ppos) {
const ROTable_entry *e = cast(const ROTable_entry *, t->entry);
const int tl = getlsizenode(t);
const int tl = gettbllsizenode((struct Table *)t);
const char *strkey = getstr(key);
const int hash = HASH(t, key);
KeyCache *cl = luaE_getcache(hash);
int i, j = 1, l;

if (!e || gettt(key) != LUA_TSHRSTR)
if (!e || gettt((struct GCObject *)key) != LUA_TSHRSTR)
return luaO_nilobject;

l = getshrlen(key);
l = getstrshrlen(key);
/* scan the ROTable key cache and return if hit found */
for (i = 0; i < KEYCACHE_M; i++) {
int cl_ndx = cl[i] >> NDX_SHFT;
Expand Down Expand Up @@ -804,7 +804,7 @@ static const TValue* rotable_findentry(ROTable *t, TString *key, unsigned *ppos)
static void rotable_next_helper(lua_State *L, ROTable *t, int pos,
TValue *key, TValue *val) {
const ROTable_entry *e = cast(const ROTable_entry *, t->entry);
if (pos < getlsizenode(t)) {
if (pos < gettbllsizenode((Table *)t)) {
/* Found an entry */
setsvalue(L, key, luaS_new(L, e[pos].key));
setobj2s(L, val, &e[pos].value);
Expand All @@ -817,7 +817,7 @@ static void rotable_next_helper(lua_State *L, ROTable *t, int pos,

/* next (used for iteration) */
static void rotable_next(lua_State *L, ROTable *t, TValue *key, TValue *val) {
unsigned keypos = getlsizenode(t);
unsigned keypos = gettbllsizenode((struct Table *)t);

/* Special case: if key is nil, return the first element of the rotable */
if (ttisnil(key))
Expand Down
4 changes: 2 additions & 2 deletions app/lua53/ltable.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@
(gkey(cast(Node *, cast(char *, (v)) - offsetof(Node, i_val))))

/* test Table to determine if it is a RW or RO table */
#define isrotable(t) (gettt(t)==LUA_TTBLROF)
#define isrwtable(t) (gettt(t)==LUA_TTBLRAM)
#define isrotable(t) (gettt((struct GCObject *)t)==LUA_TTBLROF)
#define isrwtable(t) (gettt((struct GCObject *)t)==LUA_TTBLRAM)


LUAI_FUNC const TValue *luaH_getint (Table *t, lua_Integer key);
Expand Down
2 changes: 1 addition & 1 deletion app/lua53/ltm.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ typedef enum {


#define gfasttm(g,et,e) ((et) == NULL ? NULL : \
(getflags(et) & (1u<<(e))) ? NULL : luaT_gettm(et, e, (g)->tmname[e]))
(gettblflags(et) & (1u<<(e))) ? NULL : luaT_gettm(et, e, (g)->tmname[e]))

#define fasttm(l,et,e) gfasttm(G(l), et, e)

Expand Down
33 changes: 33 additions & 0 deletions app/lua53/luaconf.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,39 @@
#define LUA_QL(x) "'" x "'" // No longer used in lua53, but still used
#define LUA_QS LUA_QL("%s") // in some of our apllication modules

/* =================================================================== */

/*
@@ LUA_LOAD_BYTE_FN is used to define macros for reading bytes from
** object headers. This can be used to speed up architectures which
** must resort to trap-and-emulate for sub-word memory accesses.
*/

#ifdef LUA_USE_ESP
/*
** Byte field access macro. On ESP targets this causes the compiler to emit
** a l32i + extui instruction pair instead of a single l8ui avoiding a call
** the S/W unaligned exception handler. This is used to force aligned access
** to commonly accessed fields in Flash-based record structures. It is not
** needed for RAM-only structures.
*/
#define LUA_LOAD_BYTE_FN(fn, type, field) \
static inline lu_int32 fn(const type *o) { \
lu_int32 res; /* extract named field */ \
asm ("l32i %0, %1, %2;" \
"extui %0, %0, %3, 8;" \
: "=r"(res) : "r"(o) \
, "i"((offsetof(type, field)/4)*4) \
, "i"((offsetof(type, field)%4)*8) \
: );\
return res; }
#endif

#if !defined(LUA_LOAD_BYTE_FN)
#define LUA_LOAD_BYTE_FN(fn, type, field) \
static inline lu_byte fn(const type *o) { return o->field; }
#endif

/*
** {==================================================================
** Other NodeMCU configuration.
Expand Down
4 changes: 2 additions & 2 deletions app/lua53/lvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ int luaV_equalobj (lua_State *L, const TValue *t1, const TValue *t2) {
#define tostring(L,o) \
(ttisstring(o) || (cvt2str(o) && (luaO_tostring(L, o), 1)))

#define isemptystr(o) (ttisshrstring(o) && getshrlen(tsvalue(o)) == 0)
#define isemptystr(o) (ttisshrstring(o) && getstrshrlen(tsvalue(o)) == 0)

/* copy strings in stack from top - n up to top - 1 to buffer */
static void copy2buff (StkId top, int n, char *buff) {
Expand Down Expand Up @@ -516,7 +516,7 @@ void luaV_objlen (lua_State *L, StkId ra, const TValue *rb) {
return;
}
case LUA_TSHRSTR: {
setivalue(ra, getshrlen(tsvalue(rb)));
setivalue(ra, getstrshrlen(tsvalue(rb)));
return;
}
case LUA_TLNGSTR: {
Expand Down
2 changes: 1 addition & 1 deletion docs/lua53.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ Lua53 also reimplements the Lua51 LCD (Lua Compact Debug) patch. This replaces t

By default the GCC compiler emits a `l8ui` instruction to access byte fields on the ESP8266 and ESP32 Xtensa processors. This instruction will generate an unaligned fetch exception when this byte field is in Flash memory (as will accessing short fields). These exceptions are handled by emulating the instruction in software using an unaligned access handler; this allows execution to continue albeit with the runtime cost of handling the exception in software. We wish to avoid the performance hit of executing this handler for such exceptions.

`lobject.h` now defines a `GET_BYTE_FN(name,t,wo,bo)` macro. In the case of host targets this macro generates the normal field access, but in the case of Xtensa targets uses of this macro define an `static inline` access function for each field. These functions at the default `-O2` optimisation level cause the code generator to emit a pair of `l32i.n` + `extui` instructions replacing the single `l8ui` instruction. This has the cost of an extra instruction execution for accessing RAM data, but also removes the 200+ clock overhead of the software exception handler in the case of flash memory accesses.
`luaconf.h` now defines a `LUA_LOAD_BYTE_FN(name,type,field)` macro. In the case of host targets this macro generates the normal field access, but in the case of Xtensa targets uses of this macro define an `static inline` access function for each field. These functions at the default `-O2` optimisation level cause the code generator to emit a pair of `l32i.n` + `extui` instructions replacing the single `l8ui` instruction. This has the cost of an extra instruction execution for accessing RAM data, but also removes the 200+ clock overhead of the software exception handler in the case of flash memory accesses.

There are 9 byte fields in the `GCObject`,`TString`, `Proto`, `ROTable` structures that can either be statically compiled as `const struct` into library code space or generated by the Lua cross compiler and loaded into the LFS region; the `GET_BYTE_FN` macro is used to create inline access functions for these fields, and read references of the form `(o)->tt` (for example) have been recoded using the access macro form `gettt(o)`. There are 44 such changed access references in the source which together represent perhaps 99% of potential sources of this software exception within the Lua VM.

Expand Down

0 comments on commit 7c675e8

Please sign in to comment.