Skip to content

Commit

Permalink
bpf: Introduce bpf_sysctl_{get,set}_new_value helpers
Browse files Browse the repository at this point in the history
Add helpers to work with new value being written to sysctl by user
space.

bpf_sysctl_get_new_value() copies value being written to sysctl into
provided buffer.

bpf_sysctl_set_new_value() overrides new value being written by user
space with a one from provided buffer. Buffer should contain string
representation of the value, similar to what can be seen in /proc/sys/.

Both helpers can be used only on sysctl write.

File position matters and can be managed by an interface that will be
introduced separately. E.g. if user space calls sys_write to a file in
/proc/sys/ at file position = X, where X > 0, then the value set by
bpf_sysctl_set_new_value() will be written starting from X. If program
wants to override whole value with specified buffer, file position has
to be set to zero.

Documentation for the new helpers is provided in bpf.h UAPI.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
rdna authored and Alexei Starovoitov committed Apr 12, 2019
1 parent 1d11b30 commit 4e63acd
Show file tree
Hide file tree
Showing 5 changed files with 142 additions and 10 deletions.
22 changes: 17 additions & 5 deletions fs/proc/proc_sysctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -570,8 +570,8 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
struct inode *inode = file_inode(filp);
struct ctl_table_header *head = grab_header(inode);
struct ctl_table *table = PROC_I(inode)->sysctl_entry;
void *new_buf = NULL;
ssize_t error;
size_t res;

if (IS_ERR(head))
return PTR_ERR(head);
Expand All @@ -589,15 +589,27 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
if (!table->proc_handler)
goto out;

error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write);
error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, &count,
&new_buf);
if (error)
goto out;

/* careful: calling conventions are nasty here */
res = count;
error = table->proc_handler(table, write, buf, &res, ppos);
if (new_buf) {
mm_segment_t old_fs;

old_fs = get_fs();
set_fs(KERNEL_DS);
error = table->proc_handler(table, write, (void __user *)new_buf,
&count, ppos);
set_fs(old_fs);
kfree(new_buf);
} else {
error = table->proc_handler(table, write, buf, &count, ppos);
}

if (!error)
error = res;
error = count;
out:
sysctl_head_finish(head);

Expand Down
8 changes: 5 additions & 3 deletions include/linux/bpf-cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,8 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,

int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
struct ctl_table *table, int write,
enum bpf_attach_type type);
void __user *buf, size_t *pcount,
void **new_buf, enum bpf_attach_type type);

static inline enum bpf_cgroup_storage_type cgroup_storage_type(
struct bpf_map *map)
Expand Down Expand Up @@ -261,11 +262,12 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
})


#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write) \
#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, nbuf) \
({ \
int __ret = 0; \
if (cgroup_bpf_enabled) \
__ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \
buf, count, nbuf, \
BPF_CGROUP_SYSCTL); \
__ret; \
})
Expand Down Expand Up @@ -338,7 +340,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,nbuf) ({ 0; })

#define for_each_cgroup_storage_type(stype) for (; false; )

Expand Down
3 changes: 3 additions & 0 deletions include/linux/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -1184,6 +1184,9 @@ struct bpf_sysctl_kern {
struct ctl_table *table;
void *cur_val;
size_t cur_len;
void *new_val;
size_t new_len;
int new_updated;
int write;
};

Expand Down
38 changes: 37 additions & 1 deletion include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -2541,6 +2541,40 @@ union bpf_attr {
*
* **-EINVAL** if current value was unavailable, e.g. because
* sysctl is uninitialized and read returns -EIO for it.
*
* int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
* Description
* Get new value being written by user space to sysctl (before
* the actual write happens) and copy it as a string into
* provided by program buffer *buf* of size *buf_len*.
*
* User space may write new value at file position > 0.
*
* The buffer is always NUL terminated, unless it's zero-sized.
* Return
* Number of character copied (not including the trailing NUL).
*
* **-E2BIG** if the buffer wasn't big enough (*buf* will contain
* truncated name in this case).
*
* **-EINVAL** if sysctl is being read.
*
* int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len)
* Description
* Override new value being written by user space to sysctl with
* value provided by program in buffer *buf* of size *buf_len*.
*
* *buf* should contain a string in same form as provided by user
* space on sysctl write.
*
* User space may write new value at file position > 0. To override
* the whole sysctl value file position should be set to zero.
* Return
* 0 on success.
*
* **-E2BIG** if the *buf_len* is too big.
*
* **-EINVAL** if sysctl is being read.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
Expand Down Expand Up @@ -2645,7 +2679,9 @@ union bpf_attr {
FN(skc_lookup_tcp), \
FN(tcp_check_syncookie), \
FN(sysctl_get_name), \
FN(sysctl_get_current_value),
FN(sysctl_get_current_value), \
FN(sysctl_get_new_value), \
FN(sysctl_set_new_value),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
Expand Down
81 changes: 80 additions & 1 deletion kernel/bpf/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,13 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
* @head: sysctl table header
* @table: sysctl table
* @write: sysctl is being read (= 0) or written (= 1)
* @buf: pointer to buffer passed by user space
* @pcount: value-result argument: value is size of buffer pointed to by @buf,
* result is size of @new_buf if program set new value, initial value
* otherwise
* @new_buf: pointer to pointer to new buffer that will be allocated if program
* overrides new value provided by user space on sysctl write
* NOTE: it's caller responsibility to free *new_buf if it was set
* @type: type of program to be executed
*
* Program is run when sysctl is being accessed, either read or written, and
Expand All @@ -788,14 +795,18 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
*/
int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
struct ctl_table *table, int write,
enum bpf_attach_type type)
void __user *buf, size_t *pcount,
void **new_buf, enum bpf_attach_type type)
{
struct bpf_sysctl_kern ctx = {
.head = head,
.table = table,
.write = write,
.cur_val = NULL,
.cur_len = PAGE_SIZE,
.new_val = NULL,
.new_len = 0,
.new_updated = 0,
};
struct cgroup *cgrp;
int ret;
Expand All @@ -818,13 +829,32 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
ctx.cur_len = 0;
}

if (write && buf && *pcount) {
/* BPF program should be able to override new value with a
* buffer bigger than provided by user.
*/
ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL);
ctx.new_len = min(PAGE_SIZE, *pcount);
if (!ctx.new_val ||
copy_from_user(ctx.new_val, buf, ctx.new_len))
/* Let BPF program decide how to proceed. */
ctx.new_len = 0;
}

rcu_read_lock();
cgrp = task_dfl_cgroup(current);
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
rcu_read_unlock();

kfree(ctx.cur_val);

if (ret == 1 && ctx.new_updated) {
*new_buf = ctx.new_val;
*pcount = ctx.new_len;
} else {
kfree(ctx.new_val);
}

return ret == 1 ? 0 : -EPERM;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl);
Expand Down Expand Up @@ -932,6 +962,51 @@ static const struct bpf_func_proto bpf_sysctl_get_current_value_proto = {
.arg3_type = ARG_CONST_SIZE,
};

BPF_CALL_3(bpf_sysctl_get_new_value, struct bpf_sysctl_kern *, ctx, char *, buf,
size_t, buf_len)
{
if (!ctx->write) {
if (buf && buf_len)
memset(buf, '\0', buf_len);
return -EINVAL;
}
return copy_sysctl_value(buf, buf_len, ctx->new_val, ctx->new_len);
}

static const struct bpf_func_proto bpf_sysctl_get_new_value_proto = {
.func = bpf_sysctl_get_new_value,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
.arg3_type = ARG_CONST_SIZE,
};

BPF_CALL_3(bpf_sysctl_set_new_value, struct bpf_sysctl_kern *, ctx,
const char *, buf, size_t, buf_len)
{
if (!ctx->write || !ctx->new_val || !ctx->new_len || !buf || !buf_len)
return -EINVAL;

if (buf_len > PAGE_SIZE - 1)
return -E2BIG;

memcpy(ctx->new_val, buf, buf_len);
ctx->new_len = buf_len;
ctx->new_updated = 1;

return 0;
}

static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = {
.func = bpf_sysctl_set_new_value,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
};

static const struct bpf_func_proto *
sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
Expand All @@ -940,6 +1015,10 @@ sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sysctl_get_name_proto;
case BPF_FUNC_sysctl_get_current_value:
return &bpf_sysctl_get_current_value_proto;
case BPF_FUNC_sysctl_get_new_value:
return &bpf_sysctl_get_new_value_proto;
case BPF_FUNC_sysctl_set_new_value:
return &bpf_sysctl_set_new_value_proto;
default:
return cgroup_base_func_proto(func_id, prog);
}
Expand Down

0 comments on commit 4e63acd

Please sign in to comment.