diff --git a/syscall/linux/c.lua b/syscall/linux/c.lua index 35e11f299e..87575c3e41 100644 --- a/syscall/linux/c.lua +++ b/syscall/linux/c.lua @@ -696,6 +696,11 @@ if sys.bpf then return syscall(sys.bpf, int(cmd), void(attr), u64(ffi.sizeof('union bpf_attr'))) end end +if sys.perf_event_open then + function C.perf_event_open(attr, pid, cpu, group_fd, flags) + return syscall(sys.perf_event_open, void(attr), int(pid), int(cpu), int(group_fd), ulong(flags)) + end +end -- socketcalls if not sys.socketcall then diff --git a/syscall/linux/constants.lua b/syscall/linux/constants.lua index 95b30e023d..c1776f2b5d 100644 --- a/syscall/linux/constants.lua +++ b/syscall/linux/constants.lua @@ -2109,6 +2109,154 @@ c.BPF_PROG = strflag { SCHED_ACT = 4, } +-- Linux performance monitoring +-- perf_event_attr.type +c.PERF_TYPE = strflag { + HARDWARE = 0, + SOFTWARE = 1, + TRACEPOINT = 2, + HW_CACHE = 3, + RAW = 4, + BREAKPOINT = 5, +} + +-- perf_event_attr.event_id +c.PERF_COUNT = strflag { + -- Generalized performance event event_id types + HW_CPU_CYCLES = 0, + HW_INSTRUCTIONS = 1, + HW_CACHE_REFERENCES = 2, + HW_CACHE_MISSES = 3, + HW_BRANCH_INSTRUCTIONS = 4, + HW_BRANCH_MISSES = 5, + HW_BUS_CYCLES = 6, + HW_STALLED_CYCLES_FRONTEND = 7, + HW_STALLED_CYCLES_BACKEND = 8, + HW_REF_CPU_CYCLES = 9, + -- Generalized hardware cache events + HW_CACHE_L1D = 0, + HW_CACHE_L1I = 1, + HW_CACHE_LL = 2, + HW_CACHE_DTLB = 3, + HW_CACHE_ITLB = 4, + HW_CACHE_BPU = 5, + HW_CACHE_NODE = 6, + HW_CACHE_OP_READ = 0, + HW_CACHE_OP_WRITE = 1, + HW_CACHE_OP_PREFETCH = 2, + HW_CACHE_RESULT_ACCESS = 0, + HW_CACHE_RESULT_MISS = 1, + -- Special "software" events provided by the kernel + SW_CPU_CLOCK = 0, + SW_TASK_CLOCK = 1, + SW_PAGE_FAULTS = 2, + SW_CONTEXT_SWITCHES = 3, + SW_CPU_MIGRATIONS = 4, + SW_PAGE_FAULTS_MIN = 5, + SW_PAGE_FAULTS_MAJ = 6, + SW_ALIGNMENT_FAULTS = 7, + SW_EMULATION_FAULTS = 8, + SW_DUMMY = 9, + SW_BPF_OUTPUT = 10, +} + +-- Bits that can be set in perf_event_attr.sample_type to request information +c.PERF_SAMPLE = multiflags { + IP = bit.lshift(1, 0), + TID = bit.lshift(1, 1), + TIME = bit.lshift(1, 2), + ADDR = bit.lshift(1, 3), + READ = bit.lshift(1, 4), + CALLCHAIN = bit.lshift(1, 5), + ID = bit.lshift(1, 6), + CPU = bit.lshift(1, 7), + PERIOD = bit.lshift(1, 8), + STREAM_ID = bit.lshift(1, 9), + RAW = bit.lshift(1, 10), + BRANCH_STACK = bit.lshift(1, 11), + REGS_USER = bit.lshift(1, 12), + STACK_USER = bit.lshift(1, 13), + WEIGHT = bit.lshift(1, 14), + DATA_SRC = bit.lshift(1, 15), + IDENTIFIER = bit.lshift(1, 16), + TRANSACTION = bit.lshift(1, 17), + REGS_INTR = bit.lshift(1, 18), +} + +-- values to program into perf_event_attr.branch_sample_type when PERF_SAMPLE_BRANCH is set +c.PERF_SAMPLE_BRANCH = multiflags { + USER_SHIFT = 0, + KERNEL_SHIFT = 1, + HV_SHIFT = 2, + ANY_SHIFT = 3, + ANY_CALL_SHIFT = 4, + ANY_RETURN_SHIFT = 5, + IND_CALL_SHIFT = 6, + ABORT_TX_SHIFT = 7, + IN_TX_SHIFT = 8, + NO_TX_SHIFT = 9, + COND_SHIFT = 10, + CALL_STACK_SHIFT = 11, + IND_JUMP_SHIFT = 12, + CALL_SHIFT = 13, + NO_FLAGS_SHIFT = 14, + NO_CYCLES_SHIFT = 15, +} +c.PERF_SAMPLE_BRANCH.USER = bit.lshift(1, c.PERF_SAMPLE_BRANCH.USER_SHIFT) +c.PERF_SAMPLE_BRANCH.KERNEL = bit.lshift(1, c.PERF_SAMPLE_BRANCH.KERNEL_SHIFT) +c.PERF_SAMPLE_BRANCH.HV = bit.lshift(1, c.PERF_SAMPLE_BRANCH.HV_SHIFT) +c.PERF_SAMPLE_BRANCH.ANY = bit.lshift(1, c.PERF_SAMPLE_BRANCH.ANY_SHIFT) +c.PERF_SAMPLE_BRANCH.ANY_CALL = bit.lshift(1, c.PERF_SAMPLE_BRANCH.ANY_CALL_SHIFT) +c.PERF_SAMPLE_BRANCH.ANY_RETURN = bit.lshift(1, c.PERF_SAMPLE_BRANCH.ANY_RETURN_SHIFT) +c.PERF_SAMPLE_BRANCH.IND_CALL = bit.lshift(1, c.PERF_SAMPLE_BRANCH.IND_CALL_SHIFT) +c.PERF_SAMPLE_BRANCH.ABORT_TX = bit.lshift(1, c.PERF_SAMPLE_BRANCH.ABORT_TX_SHIFT) +c.PERF_SAMPLE_BRANCH.IN_TX = bit.lshift(1, c.PERF_SAMPLE_BRANCH.IN_TX_SHIFT) +c.PERF_SAMPLE_BRANCH.NO_TX = bit.lshift(1, c.PERF_SAMPLE_BRANCH.NO_TX_SHIFT) +c.PERF_SAMPLE_BRANCH.COND = bit.lshift(1, c.PERF_SAMPLE_BRANCH.COND_SHIFT) +c.PERF_SAMPLE_BRANCH.CALL_STACK = bit.lshift(1, c.PERF_SAMPLE_BRANCH.CALL_STACK_SHIFT) +c.PERF_SAMPLE_BRANCH.IND_JUMP = bit.lshift(1, c.PERF_SAMPLE_BRANCH.IND_JUMP_SHIFT) +c.PERF_SAMPLE_BRANCH.CALL = bit.lshift(1, c.PERF_SAMPLE_BRANCH.CALL_SHIFT) +c.PERF_SAMPLE_BRANCH.NO_FLAGS = bit.lshift(1, c.PERF_SAMPLE_BRANCH.NO_FLAGS_SHIFT) +c.PERF_SAMPLE_BRANCH.NO_CYCLES = bit.lshift(1, c.PERF_SAMPLE_BRANCH.NO_CYCLES_SHIFT) + +-- Flags for perf_attr.read_format +c.PERF_READ_FORMAT = multiflags { + TOTAL_TIME_ENABLED = bit.lshift(1, 0), + TOTAL_TIME_RUNNING = bit.lshift(1, 1), + ID = bit.lshift(1, 2), + GROUP = bit.lshift(1, 3), +} + +-- Flags for perf_event_open +c.PERF_FLAG = multiflags { + FD_NO_GROUP = bit.lshift(1, 0), + FD_OUTPUT = bit.lshift(1, 1), + PID_CGROUP = bit.lshift(1, 2), + FD_CLOEXEC = bit.lshift(1, 3), +} + + +-- If perf_event_attr.sample_id_all is set then all event types will +-- have the sample_type selected fields related to where/when +-- (identity) an event took place (TID, TIME, ID, STREAM_ID, CPU, IDENTIFIER) +c.PERF_RECORD = strflag { + MMAP = 1, + LOST = 2, + COMM = 3, + EXIT = 4, + THROTTLE = 5, + UNTHROTTLE = 6, + FORK = 7, + READ = 8, + SAMPLE = 9, + MMAP2 = 10, + AUX = 11, + ITRACE_START = 12, + LOST_SAMPLES = 13, + SWITCH = 14, + SWITCH_CPU_WIDE= 15, +} + -- termios - c_cc characters c.CC = strflag(arch.CC or { VINTR = 0, diff --git a/syscall/linux/ffi.lua b/syscall/linux/ffi.lua index d5851ea5dc..1ebb94e5d4 100644 --- a/syscall/linux/ffi.lua +++ b/syscall/linux/ffi.lua @@ -540,6 +540,101 @@ union bpf_attr { uint32_t bpf_fd; }; } __attribute__((aligned(8))); +struct perf_event_attr { + uint32_t pe_type; + uint32_t size; + uint64_t pe_config; + union { + uint64_t sample_period; + uint64_t sample_freq; + }; + uint64_t pe_sample_type; + uint64_t read_format; + uint32_t disabled:1, + inherit:1, + pinned:1, + exclusive:1, + exclude_user:1, + exclude_kernel:1, + exclude_hv:1, + exclude_idle:1, + mmap:1, + comm:1, + freq:1, + inherit_stat:1, + enable_on_exec:1, + task:1, + watermark:1, + precise_ip:2, + mmap_data:1, + sample_id_all:1, + exclude_host:1, + exclude_guest:1, + exclude_callchain_kernel:1, + exclude_callchain_user:1, + mmap2:1, + comm_exec:1, + use_clockid:1, + __reserved_1a:6; + uint32_t __reserved_1b; + union { + uint32_t wakeup_events; + uint32_t wakeup_watermark; + }; + uint32_t bp_type; + union { + uint64_t bp_addr; + uint64_t config1; + }; + union { + uint64_t bp_len; + uint64_t config2; + }; + uint64_t branch_sample_type; + uint64_t sample_regs_user; + uint32_t sample_stack_user; + int32_t clockid; + uint64_t sample_regs_intr; + uint32_t aux_watermark; + uint32_t __reserved_2; +}; +struct perf_event_mmap_page { + uint32_t version; + uint32_t compat_version; + uint32_t lock; + uint32_t index; + int64_t offset; + uint64_t time_enabled; + uint64_t time_running; + union { + uint64_t capabilities; + struct { + uint32_t cap_bit0 : 1, + cap_bit0_is_deprecated : 1, + cap_user_rdpmc : 1, + cap_user_time : 1, + cap_user_time_zero : 1; + }; + }; + uint16_t pmc_width; + uint16_t time_shift; + uint32_t time_mult; + uint64_t time_offset; + uint64_t __reserved[120]; + volatile uint64_t data_head; + volatile uint64_t data_tail; + volatile uint64_t data_offset; + volatile uint64_t data_size; + uint64_t aux_head; + uint64_t aux_tail; + uint64_t aux_offset; + uint64_t aux_size; +}; +struct perf_event_header { + uint32_t type; + uint16_t misc; + uint16_t size; +}; struct mq_attr { long mq_flags, mq_maxmsg, mq_msgsize, mq_curmsgs, __unused[4]; }; diff --git a/syscall/linux/ioctl.lua b/syscall/linux/ioctl.lua index ec05d5e5ca..adf9de2744 100644 --- a/syscall/linux/ioctl.lua +++ b/syscall/linux/ioctl.lua @@ -266,6 +266,16 @@ local ioctl = strflag { -- from linux/vfio.h type is ';' base is 100 VFIO_GET_API_VERSION = vfio('NONE', 0), VFIO_CHECK_EXTENSION = vfio('WRITE', 1, "uint32"), +-- from linux/perf_event.h + PERF_EVENT_IOC_ENABLE = _IO('$', 0), + PERF_EVENT_IOC_DISABLE = _IO('$', 1), + PERF_EVENT_IOC_REFRESH = _IO('$', 2), + PERF_EVENT_IOC_RESET = _IO('$', 3), + PERF_EVENT_IOC_PERIOD = _IOW('$', 4, "uint64"), + PERF_EVENT_IOC_SET_OUTPUT= _IO('$', 5), + PERF_EVENT_IOC_SET_FILTER= _IOW('$', 6, "uintptr"), + PERF_EVENT_IOC_ID = _IOR('$', 7, "uint64_1"), + PERF_EVENT_IOC_SET_BPF = _IOW('$', 8, "uint32"), -- allow user defined ioctls _IO = _IO, diff --git a/syscall/linux/syscalls.lua b/syscall/linux/syscalls.lua index 0c39d2cfee..3f843d4064 100644 --- a/syscall/linux/syscalls.lua +++ b/syscall/linux/syscalls.lua @@ -835,6 +835,86 @@ if C.bpf then end end +-- Linux performance monitoring +if C.perf_event_open then + -- Open perf event fd + -- @note see man 2 perf_event_open + -- @return fd, err + function S.perf_event_open(attr, pid, cpu, group_fd, flags) + if attr[0].size == 0 then attr[0].size = ffi.sizeof(attr[0]) end + local fd = C.perf_event_open(attr, pid or 0, cpu or -1, group_fd or -1, c.PERF_FLAG[flags or 0]) + if fd < 0 then + return nil, t.error(errno()) + end + return retfd(fd) + end + -- Read the tracepoint configuration (see "/sys/kernel/debug/tracing/available_events") + -- @param event_path path to tracepoint (e.g. "/sys/kernel/debug/tracing/events/syscalls/sys_enter_write") + -- @return tp, err (e.g. 538, nil) + function S.perf_tracepoint(event_path) + local config = nil + event_path = event_path.."/id" + local fd, err = S.open(event_path, c.O.RDONLY) + if fd then + local ret, err = fd:read(nil, 256) + if ret then + config = tonumber(ret) + end + fd:close() + end + return config, err + end + -- Attach or detach a probe, same semantics as Lua tables. + -- See https://www.kernel.org/doc/Documentation/trace/kprobetrace.txt + -- (When the definition is not nil, it will be created, otherwise it will be detached) + -- @param probe_type either "kprobe" or "uprobe", no other probe types are supported + -- @param name chosen probe name (e.g. "myprobe") + -- @param definition (set to nil to disable probe) (e.g. "do_sys_open $retval") + -- @param retval true/false if this should be entrypoint probe or return probe + -- @return tp, err (e.g. 1099, nil) + function S.perf_probe(probe_type, name, definition, retval) + local event_path = string.format('/sys/kernel/debug/tracing/%s_events', probe_type) + local probe_path = string.format('/sys/kernel/debug/tracing/events/%ss/%s', probe_type, name) + -- Check if probe already exists + if definition and S.statfs(probe_path) then return nil, t.error(c.E.EEXIST) end + local fd, err = S.open(event_path, "wronly, append") + if not fd then return nil, err end + -- Format a probe definition + if not definition then + definition = "-:"..name -- Detach + else + definition = string.format("%s:%s %s", retval and "r" or "p", name, definition) + end + local ok, err = fd:write(definition) + fd:close() + -- Return tracepoint or success + if ok and definition then + return S.perf_tracepoint(probe_path) + end + return ok, err + end + -- Attach perf event reader to tracepoint (see "/sys/kernel/debug/tracing/available_events") + -- @param tp tracepoint identifier (e.g.: 538, use `S.perf_tracepoint()`) + -- @param type perf_attr.sample_type (default: "raw") + -- @param attrs table of attributes (e.g. {sample_type="raw, callchain"}, see `struct perf_event_attr`) + -- @return reader, err + function S.perf_attach_tracepoint(tp, pid, cpu, group_fd, attrs) + local pe = t.perf_event_attr1() + pe[0].type = "tracepoint" + pe[0].config = tp + pe[0].sample_type = "raw" + pe[0].sample_period = 1 + pe[0].wakeup_events = 1 + if attrs then + for k,v in pairs(attrs) do pe[0][k] = v end + end + -- Open perf event reader with given parameters + local fd, err = S.perf_event_open(pe, pid, cpu, group_fd, "fd_cloexec") + if not fd then return nil, err end + return t.perf_reader(fd) + end +end + return S end diff --git a/syscall/linux/types.lua b/syscall/linux/types.lua index e6f50ab44a..efff0a14cd 100644 --- a/syscall/linux/types.lua +++ b/syscall/linux/types.lua @@ -137,10 +137,12 @@ for k, v in pairs(addstructs) do addtype(types, k, v, lenmt) end -- these ones not in table as not helpful with vararg or arrays TODO add more addtype variants t.inotify_event = ffi.typeof("struct inotify_event") pt.inotify_event = ptt("struct inotify_event") -- still need pointer to this +pt.perf_event_header = ptt("struct perf_event_header") t.aio_context1 = ffi.typeof("aio_context_t[1]") t.sock_fprog1 = ffi.typeof("struct sock_fprog[1]") t.bpf_attr1 = ffi.typeof("union bpf_attr[1]") +t.perf_event_attr1 = ffi.typeof("struct perf_event_attr[1]") t.user_cap_data2 = ffi.typeof("struct user_cap_data[2]") @@ -1177,6 +1179,21 @@ addtype_var(types, "mmsghdrs", "struct {int count; struct mmsghdr msg[?];}", mt. addtype(types, "bpf_attr", "union bpf_attr") +-- Metatype for Linux perf events +mt.perf_event_attr = { + index = { + type = function(self) return self.pe_type end, + config = function(self) return self.pe_config end, + sample_type = function(self) return self.pe_sample_type end, + }, + newindex = { + type = function(self, v) self.pe_type = c.PERF_TYPE[v] end, + config = function(self, v) self.pe_config = c.PERF_COUNT[v] end, + sample_type = function(self, v) self.pe_sample_type = c.PERF_SAMPLE[v] end, + }, +} +addtype(types, "perf_event_attr", "struct perf_event_attr", mt.perf_event_attr) + -- this is declared above samap_pt = { [c.AF.UNIX] = pt.sockaddr_un, diff --git a/syscall/methods.lua b/syscall/methods.lua index fb2dcd36f5..51b04e51bf 100644 --- a/syscall/methods.lua +++ b/syscall/methods.lua @@ -205,6 +205,89 @@ t.timer = metatype("struct {timer_t timerid[1];}", { --__gc = S.timer_delete, }) +if abi.os == "linux" then + -- Linux performance monitoring reader + t.perf_reader = metatype("struct {int fd; char *map; size_t map_pages; }", { + __new = function (ct, fd) + if not fd then return ffi.new(ct) end + if istype(t.fd, fd) then fd = fd:nogc():getfd() end + return ffi.new(ct, fd) + end, + __len = function(t) return ffi.sizeof(t) end, + __gc = function (t) t:close() end, + __index = { + close = function(t) + t:munmap() + if t.fd > 0 then S.close(t.fd) end + end, + munmap = function (t) + if t.map_pages > 0 then + S.munmap(t.map, (t.map_pages + 1) * S.getpagesize()) + t.map_pages = 0 + end + end, + -- read(2) interface, see `perf_attr.read_format` + -- @return u64 or an array of u64 + read = function (t, len) + local rvals = ffi.new('uint64_t [4]') + local nb, err = S.read(t.fd, rvals, len or ffi.sizeof(rvals)) + if not nb then return nil, err end + return nb == 8 and rvals[0] or rvals + end, + -- mmap(2) interface, see sampling interface (`perf_attr.sample_type` and `perf_attr.mmap`) + -- first page is metadata page, the others are sample_type dependent + mmap = function (t, pages) + t:munmap() + pages = pages or 8 + local map, err = S.mmap(nil, (pages + 1) * S.getpagesize(), "read, write", "shared", t.fd, 0) + if not map then return nil, err end + t.map = map + t.map_pages = pages + return pages + end, + meta = function (t) + return t.map_pages > 0 and ffi.cast("struct perf_event_mmap_page *", t.map) or nil + end, + -- next() function for __ipairs returning (len, event) pairs + -- it only retires read events when current event length is passed + next = function (t, curlen) + local buffer_size = S.getpagesize() * t.map_pages + local base = t.map + S.getpagesize() + local meta = t:meta() + -- Retire last read event or start iterating + if curlen then + meta.data_tail = meta.data_tail + curlen + end + -- End of ring buffer, yield + -- TODO: + if meta.data_head == meta.data_tail then + return + end + local e = pt.perf_event_header(base + (meta.data_tail % buffer_size)) + local e_end = base + (meta.data_tail + e.size) % buffer_size; + -- If the perf event wraps around the ring, we need to make a contiguous copy + if ffi.cast("uintptr_t", e_end) < ffi.cast("uintptr_t", e) then + local tmp_e = ffi.new("char [?]", e.size) + local len = (base + buffer_size) - ffi.cast('char *', e) + ffi.copy(tmp_e, e, len) + ffi.copy(tmp_e + len, base, e.size - len) + e = ffi.cast(ffi.typeof(e), tmp_e) + end + return e.size, e + end, + -- Various ioctl() wrappers + ioctl = function(t, cmd, val) return S.ioctl(t.fd, cmd, val or 0) end, + start = function(t) return t:ioctl("PERF_EVENT_IOC_ENABLE") end, + stop = function(t) return t:ioctl("PERF_EVENT_IOC_DISABLE") end, + refresh = function(t) return t:ioctl("PERF_EVENT_IOC_REFRESH") end, + reset = function(t) return t:ioctl("PERF_EVENT_IOC_RESET") end, + setfilter = function(t, val) return t:ioctl("PERF_EVENT_IOC_SET_FILTER", val) end, + setbpf = function(t, fd) return t:ioctl("PERF_EVENT_IOC_SET_BPF", pt.void(fd)) end, + }, + __ipairs = function(t) return t.next, t, nil end + }) +end + -- TODO reinstate this, more like fd is, hence changes to destroy --[[ t.aio_context = metatype("struct {aio_context_t ctx;}", { diff --git a/test/ctest-linux.lua b/test/ctest-linux.lua index d8ae7d3d73..41f447bff6 100644 --- a/test/ctest-linux.lua +++ b/test/ctest-linux.lua @@ -90,6 +90,30 @@ c.BPF.JNE = nil c.BPF.MOV = nil c.SYS.bpf = nil +-- no perf_event_open on Travis CI +ctypes["struct perf_event_attr"] = nil +ctypes["struct perf_event_reader"] = nil +ctypes["struct perf_event_header"] = nil +ctypes["struct perf_event_mmap_page"] = nil +c.PERF_TYPE = {} +c.PERF_COUNT = {} +c.PERF_SAMPLE = {} +c.PERF_FLAG = {} +c.PERF_SAMPLE_REGS = {} +c.PERF_SAMPLE_BRANCH = {} +c.PERF_READ_FORMAT = {} +c.PERF_RECORD = {} +-- no perf_event_open ioctls on Travis CI +c.IOCTL.PERF_EVENT_IOC_ENABLE = nil +c.IOCTL.PERF_EVENT_IOC_DISABLE = nil +c.IOCTL.PERF_EVENT_IOC_REFRESH = nil +c.IOCTL.PERF_EVENT_IOC_RESET = nil +c.IOCTL.PERF_EVENT_IOC_PERIOD = nil +c.IOCTL.PERF_EVENT_IOC_SET_OUTPUT = nil +c.IOCTL.PERF_EVENT_IOC_SET_FILTER = nil +c.IOCTL.PERF_EVENT_IOC_ID = nil +c.IOCTL.PERF_EVENT_IOC_SET_BPF = nil + if abi.arch == "arm" then ctypes["struct statfs64"] = nil end -- padding difference, not that important for k, v in pairs(c.IOCTL) do if type(v) == "table" then c.IOCTL[k] = v.number end end diff --git a/test/linux-constants.lua b/test/linux-constants.lua index d1b8295d2a..c81da3e0d5 100644 --- a/test/linux-constants.lua +++ b/test/linux-constants.lua @@ -238,6 +238,15 @@ local function fixup_constants(abi, c) c.BPF.ANY = nil c.BPF.EXIST = nil c.BPF.NOEXIST = nil + -- no perf_event_open on Travis CI + c.PERF_TYPE = {} + c.PERF_COUNT = {} + c.PERF_SAMPLE = {} + c.PERF_FLAG = {} + c.PERF_SAMPLE_REGS = {} + c.PERF_SAMPLE_BRANCH = {} + c.PERF_READ_FORMAT = {} + c.PERF_RECORD = {} return c end diff --git a/test/linux-structures.lua b/test/linux-structures.lua index 8793e9f6ee..4fb06593fc 100644 --- a/test/linux-structures.lua +++ b/test/linux-structures.lua @@ -28,6 +28,7 @@ local function fixup_structs(abi, ctypes) ctypes["struct capabilities"] = nil ctypes["struct cap"] = nil ctypes["struct {dev_t dev;}"] = nil + ctypes["struct perf_event_reader"] = nil -- standard headers use __kernel types for these or just fixed sizes ctypes.ino_t = nil @@ -70,7 +71,9 @@ local function fixup_structs(abi, ctypes) ctypes["struct mmsghdr"] = nil -- too new for our headers ctypes["union bpf_attr"] = nil -- too new for our headers ctypes["struct bpf_insn"] = nil -- too new for our headers - + ctypes["struct perf_event_attr"] = nil -- too new for our headers + ctypes["struct perf_event_header"] = nil -- too new for our headers + ctypes["struct perf_event_mmap_page"] = nil -- too new for our headers ctypes["sigset_t"] = nil -- still some issues return ctypes diff --git a/test/linux.lua b/test/linux.lua index 0cfe2dd25c..391d25c463 100644 --- a/test/linux.lua +++ b/test/linux.lua @@ -1338,6 +1338,121 @@ if S.bpf and not S.__rump then end end +-- test perf_event_open +if S.perf_event_open and not S.__rump then + test.perf_root = {} + test.perf_root.test_perf_open = function () + -- Create perf event attribute with dummy config + local pe = t.perf_event_attr1() + pe[0].type = "software" + pe[0].config = "sw_dummy" + pe[0].disabled = 1 + pe[0].exclude_kernel = 1 + pe[0].exclude_hv = 1 + -- Open event and read a dummy value + local fd = S.perf_event_open(pe) + fd:ioctl("PERF_EVENT_IOC_ENABLE", 0) + local count = t.buffer(ffi.sizeof('int64_t')) + local rb = fd:read(count, ffi.sizeof(count)) + fd:ioctl("PERF_EVENT_IOC_DISABLE", 0) + fd:close() + -- Check just the size of read count + assert(rb == ffi.sizeof(count)) + end + test.perf_root.test_perf_sw = function () + -- Read out a software perf counter + local pe = t.perf_event_attr1() + pe[0].type = "software" + pe[0].config = "sw_cpu_clock" + pe[0].exclude_kernel = 1 + pe[0].exclude_hv = 1 + -- Open event and read a dummy value + -- @note perf event fd has CLO_EXEC, must not fork + local reader = t.perf_reader(S.perf_event_open(pe)) + reader:start() + local ticks = reader:read() + reader:close() + -- Check just the size of read count + assert(ticks > 0) + end + test.perf_root.test_perf_attach = function () + if not S.statfs("/sys/kernel/debug/tracing/events") then + print('skipping') -- debugfs must be mounted + return + end + -- Get tracepoint id + local tp = assert(S.perf_tracepoint("/sys/kernel/debug/tracing/events/syscalls/sys_enter_getcwd")) + local reader = S.perf_attach_tracepoint(tp) + -- Trace getcwd() syscall + reader:start() + S.getcwd() + S.getcwd() + local cnt = reader:read() + reader:stop() + reader:close() + -- Check value + assert(cnt == 2) + end + test.perf_root.test_perf_sampling = function () + if not S.statfs("/sys/kernel/debug/tracing/events") then + print('skipping') -- debugfs must be mounted + return + end + local sample_t = ffi.typeof [[ + struct { + struct perf_event_header header; + uint32_t size; + struct { + uint16_t id; + uint8_t flags; + uint8_t preempt_count; + int pid; + }; + uint64_t ip; + } * + ]] + -- Get tracepoint id + local tp = assert(S.perf_tracepoint("/sys/kernel/debug/tracing/events/syscalls/sys_enter_getcwd")) + local reader = S.perf_attach_tracepoint(tp) + -- Trace getcwd() syscall + reader:mmap() + reader:start() + for i = 1,10 do S.getcwd() end + reader:stop() + -- Read samples from mmap + local cnt = 0; + for len,e in ipairs(reader) do + if e.type ~= c.PERF_RECORD.SAMPLE then break end + -- Check if we're the caller + e = ffi.cast(sample_t, e) + if e.pid == S.getpid() then + cnt = cnt + 1 + end + end + reader:close() + -- Check if we got all samples + assert(cnt == 10) + end + test.perf_root.test_perf_kprobe = function () + if not S.statfs("/sys/kernel/debug/tracing/events") then + print('skipping') -- debugfs must be mounted + return + end + -- Attach a kprobe to open() + local tp = assert(S.perf_probe("kprobe", "myprobe", "do_sys_open $retval", true)) + local reader = S.perf_attach_tracepoint(tp) + reader:start() + S.open("/tmp", "rdonly") + local cnt = reader:read() + reader:stop() + reader:close() + -- Detach probe + S.perf_probe("kprobe", "myprobe", false) + -- See if we hit the probe + assert(cnt == 1) + end +end + -- TODO remove arch tests. Unclear if my ppc/arm does not support or a bug, retest later with newer kernel -- still ppc issues with 3.12.6 ppc, need to debug more, and mips issues if not (abi.arch == "ppc64le" or abi.arch == "ppc" or abi.arch == "mips" or S.__rump) then -- cannot test on rump as uses clone() diff --git a/test/test.lua b/test/test.lua index e5b19504af..ca35cebd68 100644 --- a/test/test.lua +++ b/test/test.lua @@ -2494,7 +2494,8 @@ if S.geteuid() == 0 then local i = assert(nl.interfaces()) local lo = assert(i.lo) assert(lo:up()) - assert(S.mount("none", "/sys", "sysfs")) + -- Do not destroy "/sys" if it is mounted + assert(S.statfs("/sys/kernel") or S.mount("none", "/sys", "sysfs")) end else -- not Linux -- run all tests, no namespaces available