Skip to content

Commit

Permalink
VMProfile 4.0: File format revision
Browse files Browse the repository at this point in the history
Now data is stored in a 2D matrix indexed by VM state and trace number
(0 for "other.") This holds for all VM states including interpreting,
recording, etc.

This makes the format simpler and more regular. Previously the samples
were stored in two related sections, one global and one per-trace. Now
it is all per-trace with 0 as a catch all. (Just sum the values for
all traces to compute the total "global" values.)
  • Loading branch information
lukego committed Nov 23, 2017
1 parent 5cac382 commit c1659d9
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 55 deletions.
66 changes: 30 additions & 36 deletions src/lj_vmprofile.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ int vmprofile_get_profile_size() {
void vmprofile_set_profile(void *counters) {
profile = (VMProfile*)counters;
profile->magic = 0x1d50f007;
profile->major = 3;
profile->major = 4;
profile->minor = 0;
}

Expand All @@ -56,48 +56,42 @@ void vmprofile_set_profile(void *counters) {
static void vmprofile_signal(int sig, siginfo_t *si, void *data)
{
if (profile != NULL) {
int vmstate, trace; /* sample matrix indices */
lua_State *L = gco2th(gcref(state.g->cur_L));
int vmstate = state.g->vmstate;
int trace = 0;
/* Get the relevant trace number */
if (vmstate > 0) {
/* JIT mcode */
trace = vmstate;
} else if (~vmstate == LJ_VMST_GC) {
/* JIT GC */
trace = state.g->gcvmstate;
} else if (~vmstate == LJ_VMST_INTERP && state.g->lasttrace > 0) {
/* Interpreter entered at the end of some trace */
trace = state.g->lasttrace;
}
if (trace > 0) {
/* JIT mode: Bump a global counter and a per-trace counter. */
int bucket = trace > LJ_VMPROFILE_TRACE_MAX ? 0 : trace;
VMProfileTraceCount *count = &profile->trace[bucket];
GCtrace *T = traceref(L2J(L), (TraceNo)trace);
/*
* The basic job of this function is to select the right indices
* into the profile counter matrix. That requires deciding which
* logical state the VM is in and which trace the sample should be
* attributed to. Heuristics are needed to pick appropriate values.
*/
if (state.g->vmstate > 0) { /* Running JIT mcode. */
GCtrace *T = traceref(L2J(L), (TraceNo)state.g->vmstate);
intptr_t ip = (intptr_t)((ucontext_t*)data)->uc_mcontext.gregs[REG_RIP];
ptrdiff_t mcposition = ip - (intptr_t)T->mcode;
printf("trace %d interp %d\n", trace, ~vmstate == LJ_VMST_INTERP);
if (~vmstate == LJ_VMST_GC) {
profile->vm[LJ_VMST_JGC]++;
count->gc++;
} else if (~vmstate == LJ_VMST_INTERP) {
profile->vm[LJ_VMST_INTERP]++;
count->interp++;
} else if ((mcposition < 0) || (mcposition >= T->szmcode)) {
profile->vm[LJ_VMST_FFI]++;
count->ffi++;
if ((mcposition < 0) || (mcposition >= T->szmcode)) {
vmstate = LJ_VMST_FFI; /* IP is outside the trace mcode. */
} else if ((T->mcloop != 0) && (mcposition >= T->mcloop)) {
profile->vm[LJ_VMST_LOOP]++;
count->loop++;
vmstate = LJ_VMST_LOOP; /* IP is inside the mcode loop. */
} else {
vmstate = LJ_VMST_HEAD; /* IP is inside mcode but not loop. */
}
trace = state.g->vmstate;
} else { /* Running VM code (not JIT mcode.) */
if (~state.g->vmstate == LJ_VMST_GC && state.g->gcvmstate > 0) {
/* Special case: GC invoked from JIT mcode. */
vmstate = LJ_VMST_JGC;
trace = state.g->gcvmstate;
} else {
profile->vm[LJ_VMST_HEAD]++;
count->head++;
/* General case: count towards most recently exited trace. */
vmstate = ~state.g->vmstate;
trace = state.g->lasttrace;
}
} else {
/* Interpreter mode: Just bump a global counter. */
profile->vm[~vmstate]++;
}
/* Handle overflow from individual trace counters. */
trace = trace <= LJ_VMPROFILE_TRACE_MAX ? trace : LJ_VMPROFILE_TRACE_MAX+1;
/* Phew! We have calculated the indices and now we can bump the counter. */
assert(vmstate >= 0 && vmstate <= LJ_VMST__MAX);
profile->count[trace][vmstate]++;
}
}

Expand Down
29 changes: 10 additions & 19 deletions src/lj_vmprofile.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,31 +10,22 @@
typedef uint64_t VMProfileCount;

/* Maximum trace number for distinct counter buckets. Traces with
higher numbers will be counted together in bucket zero. */
higher numbers will be counted together in a shared overflow bucket. */
#define LJ_VMPROFILE_TRACE_MAX 4096

/* Traces have separate counters for different machine code regions. */
typedef struct VMProfileTraceCount {
VMProfileCount head; /* Head of the trace (non-looping part) */
VMProfileCount loop; /* Loop of the trace */
VMProfileCount ffi; /* Outside the trace mcode (assumed FFI) */
VMProfileCount gc; /* Garbage collection from this trace. */
VMProfileCount interp; /* Interpreter due to exit from this trace. */
} VMProfileTraceCount;

/* Complete set of counters for VM and traces. */
typedef struct VMProfile {
uint32_t magic; /* 0x1d50f007 */
uint16_t major, minor; /* 3, 0 */
/* The profiler always bumps exactly one VM state counter. */
VMProfileCount vm[LJ_VMST__MAX];
/* The profiler also bumps exactly one per-trace counter for the
** currently executing trace (JIT mode) or for the most recently
** executing trace (interpreter mode.) This bump is skipped only if
** no trace can be identified for some reason e.g. none have been
** recorded.
uint16_t major, minor; /* 4, 0 */
/* Profile counters are stored in a 2D matrix of count[trace][state].
**
** The profiler attempts to attribute each sample to one vmstate and
** one trace. The vmstate is an LJ_VMST_* constant. The trace is
** either 1..4096 (counter for one individual trace) or 0 (shared
** counter for all higher-numbered traces and for samples that can't
** be attributed to a specific trace at all.)
**/
VMProfileTraceCount trace[LJ_VMPROFILE_TRACE_MAX+1];
VMProfileCount count[LJ_VMPROFILE_TRACE_MAX+1][LJ_VMST__MAX];
} VMProfile;

/* Functions that should be accessed via FFI. */
Expand Down

0 comments on commit c1659d9

Please sign in to comment.