Skip to content

Commit

Permalink
bpf: direct packet write and access for helpers for clsact progs
Browse files Browse the repository at this point in the history
This work implements direct packet access for helpers and direct packet
write in a similar fashion as already available for XDP types via commits
4acf6c0 ("bpf: enable direct packet data write for xdp progs") and
6841de8 ("bpf: allow helpers access the packet directly"), and as a
complementary feature to the already available direct packet read for tc
(cls/act) programs.

For enabling this, we need to introduce two helpers, bpf_skb_pull_data()
and bpf_csum_update(). The first is generally needed for both, read and
write, because they would otherwise only be limited to the current linear
skb head. Usually, when the data_end test fails, programs just bail out,
or, in the direct read case, use bpf_skb_load_bytes() as an alternative
to overcome this limitation. If such data sits in non-linear parts, we
can just pull them in once with the new helper, retest and eventually
access them.

At the same time, this also makes sure the skb is uncloned, which is, of
course, a necessary condition for direct write. As this needs to be an
invariant for the write part only, the verifier detects writes and adds
a prologue that is calling bpf_skb_pull_data() to effectively unclone the
skb from the very beginning in case it is indeed cloned. The heuristic
makes use of a similar trick that was done in 233577a ("net: filter:
constify detection of pkt_type_offset"). This comes at zero cost for other
programs that do not use the direct write feature. Should a program use
this feature only sparsely and has read access for the most parts with,
for example, drop return codes, then such write action can be delegated
to a tail called program for mitigating this cost of potential uncloning
to a late point in time where it would have been paid similarly with the
bpf_skb_store_bytes() as well. Advantage of direct write is that the
writes are inlined whereas the helper cannot make any length assumptions
and thus needs to generate a call to memcpy() also for small sizes, as well
as cost of helper call itself with sanity checks are avoided. Plus, when
direct read is already used, we don't need to cache or perform rechecks
on the data boundaries (due to verifier invalidating previous checks for
helpers that change skb->data), so more complex programs using rewrites
can benefit from switching to direct read plus write.

For direct packet access to helpers, we save the otherwise needed copy into
a temp struct sitting on stack memory when use-case allows. Both facilities
are enabled via may_access_direct_pkt_data() in verifier. For now, we limit
this to map helpers and csum_diff, and can successively enable other helpers
where we find it makes sense. Helpers that definitely cannot be allowed for
this are those part of bpf_helper_changes_skb_data() since they can change
underlying data, and those that write into memory as this could happen for
packet typed args when still cloned. bpf_csum_update() helper accommodates
for the fact that we need to fixup checksum_complete when using direct write
instead of bpf_skb_store_bytes(), meaning the programs can use available
helpers like bpf_csum_diff(), and implement csum_add(), csum_sub(),
csum_block_add(), csum_block_sub() equivalents in eBPF together with the
new helper. A usage example will be provided for iproute2's examples/bpf/
directory.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
borkmann authored and davem330 committed Sep 21, 2016
1 parent b399cf6 commit 36bbef5
Show file tree
Hide file tree
Showing 6 changed files with 196 additions and 34 deletions.
4 changes: 3 additions & 1 deletion include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ enum bpf_return_type {
struct bpf_func_proto {
u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
bool gpl_only;
bool pkt_access;
enum bpf_return_type ret_type;
enum bpf_arg_type arg1_type;
enum bpf_arg_type arg2_type;
Expand Down Expand Up @@ -151,7 +152,8 @@ struct bpf_verifier_ops {
*/
bool (*is_valid_access)(int off, int size, enum bpf_access_type type,
enum bpf_reg_type *reg_type);

int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
const struct bpf_prog *prog);
u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg,
int src_reg, int ctx_off,
struct bpf_insn *insn, struct bpf_prog *prog);
Expand Down
14 changes: 12 additions & 2 deletions include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -676,13 +676,23 @@ struct sk_buff {
*/
kmemcheck_bitfield_begin(flags1);
__u16 queue_mapping;

/* if you move cloned around you also must adapt those constants */
#ifdef __BIG_ENDIAN_BITFIELD
#define CLONED_MASK (1 << 7)
#else
#define CLONED_MASK 1
#endif
#define CLONED_OFFSET() offsetof(struct sk_buff, __cloned_offset)

__u8 __cloned_offset[0];
__u8 cloned:1,
nohdr:1,
fclone:2,
peeked:1,
head_frag:1,
xmit_more:1;
/* one bit hole */
xmit_more:1,
__unused:1; /* one bit hole */
kmemcheck_bitfield_end(flags1);

/* fields enclosed in headers_start/headers_end are copied
Expand Down
21 changes: 21 additions & 0 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,27 @@ enum bpf_func_id {
*/
BPF_FUNC_skb_change_tail,

/**
* bpf_skb_pull_data(skb, len)
* The helper will pull in non-linear data in case the
* skb is non-linear and not all of len are part of the
* linear section. Only needed for read/write with direct
* packet access.
* @skb: pointer to skb
* @len: len to make read/writeable
* Return: 0 on success or negative error
*/
BPF_FUNC_skb_pull_data,

/**
* bpf_csum_update(skb, csum)
* Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
* @skb: pointer to skb
* @csum: csum to add
* Return: csum on success or negative error
*/
BPF_FUNC_csum_update,

__BPF_FUNC_MAX_ID,
};

Expand Down
3 changes: 3 additions & 0 deletions kernel/bpf/helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
const struct bpf_func_proto bpf_map_lookup_elem_proto = {
.func = bpf_map_lookup_elem,
.gpl_only = false,
.pkt_access = true,
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_MAP_KEY,
Expand All @@ -51,6 +52,7 @@ BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
const struct bpf_func_proto bpf_map_update_elem_proto = {
.func = bpf_map_update_elem,
.gpl_only = false,
.pkt_access = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_MAP_KEY,
Expand All @@ -67,6 +69,7 @@ BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
const struct bpf_func_proto bpf_map_delete_elem_proto = {
.func = bpf_map_delete_elem,
.gpl_only = false,
.pkt_access = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_MAP_KEY,
Expand Down
54 changes: 40 additions & 14 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ struct verifier_env {
u32 used_map_cnt; /* number of used maps */
u32 id_gen; /* used to generate unique reg IDs */
bool allow_ptr_leaks;
bool seen_direct_write;
};

#define BPF_COMPLEXITY_LIMIT_INSNS 65536
Expand All @@ -204,6 +205,7 @@ struct verifier_env {
struct bpf_call_arg_meta {
struct bpf_map *map_ptr;
bool raw_mode;
bool pkt_access;
int regno;
int access_size;
};
Expand Down Expand Up @@ -654,10 +656,17 @@ static int check_map_access(struct verifier_env *env, u32 regno, int off,

#define MAX_PACKET_OFF 0xffff

static bool may_write_pkt_data(enum bpf_prog_type type)
static bool may_access_direct_pkt_data(struct verifier_env *env,
const struct bpf_call_arg_meta *meta)
{
switch (type) {
switch (env->prog->type) {
case BPF_PROG_TYPE_SCHED_CLS:
case BPF_PROG_TYPE_SCHED_ACT:
case BPF_PROG_TYPE_XDP:
if (meta)
return meta->pkt_access;

env->seen_direct_write = true;
return true;
default:
return false;
Expand Down Expand Up @@ -817,7 +826,7 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
err = check_stack_read(state, off, size, value_regno);
}
} else if (state->regs[regno].type == PTR_TO_PACKET) {
if (t == BPF_WRITE && !may_write_pkt_data(env->prog->type)) {
if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL)) {
verbose("cannot write into packet\n");
return -EACCES;
}
Expand Down Expand Up @@ -950,8 +959,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
return 0;
}

if (type == PTR_TO_PACKET && !may_write_pkt_data(env->prog->type)) {
verbose("helper access to the packet is not allowed for clsact\n");
if (type == PTR_TO_PACKET && !may_access_direct_pkt_data(env, meta)) {
verbose("helper access to the packet is not allowed\n");
return -EACCES;
}

Expand Down Expand Up @@ -1191,6 +1200,7 @@ static int check_call(struct verifier_env *env, int func_id)
changes_data = bpf_helper_changes_skb_data(fn->func);

memset(&meta, 0, sizeof(meta));
meta.pkt_access = fn->pkt_access;

/* We only support one arg being in raw mode at the moment, which
* is sufficient for the helper functions we have right now.
Expand Down Expand Up @@ -2675,18 +2685,35 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env)
*/
static int convert_ctx_accesses(struct verifier_env *env)
{
struct bpf_insn *insn = env->prog->insnsi;
int insn_cnt = env->prog->len;
struct bpf_insn insn_buf[16];
const struct bpf_verifier_ops *ops = env->prog->aux->ops;
struct bpf_insn insn_buf[16], *insn;
struct bpf_prog *new_prog;
enum bpf_access_type type;
int i;
int i, insn_cnt, cnt;

if (!env->prog->aux->ops->convert_ctx_access)
if (ops->gen_prologue) {
cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
env->prog);
if (cnt >= ARRAY_SIZE(insn_buf)) {
verbose("bpf verifier is misconfigured\n");
return -EINVAL;
} else if (cnt) {
new_prog = bpf_patch_insn_single(env->prog, 0,
insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
env->prog = new_prog;
}
}

if (!ops->convert_ctx_access)
return 0;

insn_cnt = env->prog->len;
insn = env->prog->insnsi;

for (i = 0; i < insn_cnt; i++, insn++) {
u32 insn_delta, cnt;
u32 insn_delta;

if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
Expand All @@ -2703,9 +2730,8 @@ static int convert_ctx_accesses(struct verifier_env *env)
continue;
}

cnt = env->prog->aux->ops->
convert_ctx_access(type, insn->dst_reg, insn->src_reg,
insn->off, insn_buf, env->prog);
cnt = ops->convert_ctx_access(type, insn->dst_reg, insn->src_reg,
insn->off, insn_buf, env->prog);
if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
verbose("bpf verifier is misconfigured\n");
return -EINVAL;
Expand Down
Loading

0 comments on commit 36bbef5

Please sign in to comment.