Skip to content

Commit

Permalink
Statically allocate JIT temp buffers
Browse files Browse the repository at this point in the history
If we only need a few ~64K element arrays then just allocate those up
front and don't worry about dynamically growing things. This is
simpler, requires less code, and has no obvious practical downside.
  • Loading branch information
lukego committed Nov 28, 2017
1 parent 39680ca commit 16121f2
Show file tree
Hide file tree
Showing 8 changed files with 17 additions and 108 deletions.
53 changes: 2 additions & 51 deletions src/lj_ir.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,49 +65,6 @@ IRCALLDEF(IRCALLCI)

/* -- IR emitter ---------------------------------------------------------- */

/* Grow IR buffer at the top. */
void lj_ir_growtop(jit_State *J)
{
IRIns *baseir = J->irbuf + J->irbotlim;
MSize szins = J->irtoplim - J->irbotlim;
if (szins) {
baseir = (IRIns *)lj_mem_realloc(J->L, baseir, szins*sizeof(IRIns),
2*szins*sizeof(IRIns));
J->irtoplim = J->irbotlim + 2*szins;
} else {
baseir = (IRIns *)lj_mem_realloc(J->L, NULL, 0, LJ_MIN_IRSZ*sizeof(IRIns));
J->irbotlim = REF_BASE - LJ_MIN_IRSZ/4;
J->irtoplim = J->irbotlim + LJ_MIN_IRSZ;
}
J->cur.ir = J->irbuf = baseir - J->irbotlim;
}

/* Grow IR buffer at the bottom or shift it up. */
static void lj_ir_growbot(jit_State *J)
{
IRIns *baseir = J->irbuf + J->irbotlim;
MSize szins = J->irtoplim - J->irbotlim;
lua_assert(szins != 0);
lua_assert(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim);
if (J->cur.nins + (szins >> 1) < J->irtoplim) {
/* More than half of the buffer is free on top: shift up by a quarter. */
MSize ofs = szins >> 2;
memmove(baseir + ofs, baseir, (J->cur.nins - J->irbotlim)*sizeof(IRIns));
J->irbotlim -= ofs;
J->irtoplim -= ofs;
J->cur.ir = J->irbuf = baseir - J->irbotlim;
} else {
/* Double the buffer size, but split the growth amongst top/bottom. */
IRIns *newbase = lj_mem_newt(J->L, 2*szins*sizeof(IRIns), IRIns);
MSize ofs = szins >= 256 ? 128 : (szins >> 1); /* Limit bottom growth. */
memcpy(newbase + ofs, baseir, (J->cur.nins - J->irbotlim)*sizeof(IRIns));
lj_mem_free(G(J->L), baseir, szins*sizeof(IRIns));
J->irbotlim -= ofs;
J->irtoplim = J->irbotlim + 2*szins;
J->cur.ir = J->irbuf = newbase - J->irbotlim;
}
}

/* Emit IR without any optimizations. */
TRef lj_ir_emit(jit_State *J)
{
Expand Down Expand Up @@ -161,25 +118,19 @@ LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs)
** comparisons. The same constant must get the same reference.
*/

/* Get ref of next IR constant and optionally grow IR.
** Note: this may invalidate all IRIns *!
*/
/* Get ref of next IR constant. */
static LJ_AINLINE IRRef ir_nextk(jit_State *J)
{
IRRef ref = J->cur.nk;
if (LJ_UNLIKELY(ref <= J->irbotlim)) lj_ir_growbot(J);
J->cur.nk = --ref;
return ref;
}

/* Get ref of next 64 bit IR constant and optionally grow IR.
** Note: this may invalidate all IRIns *!
*/
/* Get ref of next 64 bit IR constant. */
static LJ_AINLINE IRRef ir_nextk64(jit_State *J)
{
IRRef ref = J->cur.nk - 2;
lua_assert(J->state != LJ_TRACE_ASM);
if (LJ_UNLIKELY(ref < J->irbotlim)) lj_ir_growbot(J);
J->cur.nk = ref;
return ref;
}
Expand Down
5 changes: 1 addition & 4 deletions src/lj_iropt.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,10 @@ static LJ_AINLINE void lj_ir_set_(jit_State *J, uint16_t ot, IRRef1 a, IRRef1 b)
#define lj_ir_set(J, ot, a, b) \
lj_ir_set_(J, (uint16_t)(ot), (IRRef1)(a), (IRRef1)(b))

/* Get ref of next IR instruction and optionally grow IR.
** Note: this may invalidate all IRIns*!
*/
/* Get ref of next IR instruction. */
static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J)
{
IRRef ref = J->cur.nins;
if (LJ_UNLIKELY(ref >= J->irtoplim)) lj_ir_growtop(J);
J->cur.nins = ref + 1;
return ref;
}
Expand Down
2 changes: 0 additions & 2 deletions src/lj_jit.h
Original file line number Diff line number Diff line change
Expand Up @@ -333,8 +333,6 @@ typedef struct jit_State {
uint32_t k32[LJ_K32__MAX]; /* Ditto for 4 byte constants. */

IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */
IRRef irtoplim; /* Upper limit of instuction buffer (biased). */
IRRef irbotlim; /* Lower limit of instuction buffer (biased). */
IRRef loopref; /* Last loop reference or ref of final LOOP (or 0). */

MSize sizesnap; /* Size of temp. snapshot buffer. */
Expand Down
8 changes: 0 additions & 8 deletions src/lj_opt_loop.c
Original file line number Diff line number Diff line change
Expand Up @@ -283,15 +283,7 @@ static void loop_unroll(LoopState *lps)
/* LOOP separates the pre-roll from the loop body. */
emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0);

/* Grow snapshot buffer and map for copy-substituted snapshots.
** Need up to twice the number of snapshots minus #0 and loop snapshot.
** Need up to twice the number of entries plus fallback substitutions
** from the loop snapshot entries for each new snapshot.
** Caveat: both calls may reallocate J->cur.snap and J->cur.snapmap!
*/
onsnap = J->cur.nsnap;
lj_snap_grow_buf(J, 2*onsnap-2);
lj_snap_grow_map(J, J->cur.nsnapmap*2+(onsnap-2)*J->cur.snap[onsnap-1].nent);

/* The loop snapshot is used for fallback substitutions. */
loopsnap = &J->cur.snap[onsnap-1];
Expand Down
27 changes: 0 additions & 27 deletions src/lj_snap.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,31 +29,6 @@
/* Emit raw IR without passing through optimizations. */
#define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))

/* -- Snapshot buffer allocation ------------------------------------------ */

/* Grow snapshot buffer. */
void lj_snap_grow_buf_(jit_State *J, MSize need)
{
MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
if (need > maxsnap)
lj_trace_err(J, LJ_TRERR_SNAPOV);
lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
J->cur.snap = J->snapbuf;
}

/* Grow snapshot map buffer. */
void lj_snap_grow_map_(jit_State *J, MSize need)
{
if (need < 2*J->sizesnapmap)
need = 2*J->sizesnapmap;
else if (need < 64)
need = 64;
J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry));
J->cur.snapmap = J->snapmapbuf;
J->sizesnapmap = need;
}

/* -- Snapshot generation ------------------------------------------------- */

/* Add all modified slots to the snapshot. */
Expand Down Expand Up @@ -130,7 +105,6 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
MSize nent;
SnapEntry *p;
/* Conservative estimate. */
lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1));
p = &J->cur.snapmap[nsnapmap];
nent = snapshot_slots(J, p, nslots);
snap->nent = (uint8_t)nent;
Expand All @@ -157,7 +131,6 @@ void lj_snap_add(jit_State *J)
nsnapmap = J->cur.snap[--nsnap].mapofs;
} else {
nomerge:
lj_snap_grow_buf(J, nsnap+1);
J->cur.nsnap = (uint16_t)(nsnap+1);
}
J->mergesnap = 0;
Expand Down
13 changes: 0 additions & 13 deletions src/lj_snap.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,5 @@ LJ_FUNC void lj_snap_shrink(jit_State *J);
LJ_FUNC IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir);
LJ_FUNC void lj_snap_replay(jit_State *J, GCtrace *T);
LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr);
LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need);
LJ_FUNC void lj_snap_grow_map_(jit_State *J, MSize need);

static LJ_AINLINE void lj_snap_grow_buf(jit_State *J, MSize need)
{
if (LJ_UNLIKELY(need > J->sizesnap)) lj_snap_grow_buf_(J, need);
}

static LJ_AINLINE void lj_snap_grow_map(jit_State *J, MSize need)
{
if (LJ_UNLIKELY(need > J->sizesnapmap)) lj_snap_grow_map_(J, need);
}


#endif
14 changes: 14 additions & 0 deletions src/lj_state.c
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud)
static void close_state(lua_State *L)
{
global_State *g = G(L);
jit_State *J = L2J(L);
lj_func_closeuv(L, tvref(L->stack));
lj_gc_freeall(g);
lua_assert(gcref(g->gc.root) == obj2gco(L));
Expand All @@ -167,6 +168,9 @@ static void close_state(lua_State *L)
lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
lj_buf_free(g, &g->tmpbuf);
lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
lj_mem_free(g, J->snapmapbuf, J->sizesnapmap);
lj_mem_free(g, J->snapbuf, J->sizesnap);
lj_mem_free(g, J->irbuf-REF_BIAS, 65536*sizeof(IRIns));
lua_assert(g->gc.total == sizeof(GG_State));
#ifndef LUAJIT_USE_SYSMALLOC
if (g->allocf == lj_alloc_f)
Expand All @@ -181,6 +185,7 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
GG_State *GG = (GG_State *)f(ud, NULL, 0, sizeof(GG_State));
lua_State *L = &GG->L;
global_State *g = &GG->g;
jit_State *J = &GG->J;
if (GG == NULL || !checkptrGC(GG)) return NULL;
memset(GG, 0, sizeof(GG_State));
L->gct = ~LJ_TTHREAD;
Expand All @@ -206,6 +211,15 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
g->gc.total = sizeof(GG_State);
g->gc.pause = LUAI_GCPAUSE;
g->gc.stepmul = LUAI_GCMUL;
/* Statically allocate generous JIT scratch buffers. */
J->sizesnap = sizeof(SnapShot)*65536;
J->sizesnapmap = sizeof(SnapEntry)*65536;
J->snapbuf = (SnapShot *)lj_mem_new(L, J->sizesnap);
J->snapmapbuf = (SnapEntry *)lj_mem_new(L, J->sizesnapmap);
IRIns *irbufmem = (IRIns *)lj_mem_new(L, sizeof(IRIns)*65536);
if (irbufmem == NULL || J->snapbuf == NULL || J->snapmapbuf == NULL)
return NULL;
J->irbuf = irbufmem + REF_BIAS;
lj_dispatch_init((GG_State *)L);
L->status = LUA_ERRERR+1; /* Avoid touching the stack upon memory error. */
if (lj_vm_cpcall(L, NULL, NULL, cpluaopen) != 0) {
Expand Down
3 changes: 0 additions & 3 deletions src/lj_trace.c
Original file line number Diff line number Diff line change
Expand Up @@ -292,9 +292,6 @@ void lj_trace_freestate(global_State *g)
}
#endif
lj_mcode_free(J);
lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, SnapEntry);
lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot);
lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns);
lj_mem_freevec(g, J->trace, J->sizetrace, GCRef);
}

Expand Down

0 comments on commit 16121f2

Please sign in to comment.