-
Notifications
You must be signed in to change notification settings - Fork 54.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
BPF syscall is a multiplexor for a range of different operations on eBPF. This patch introduces syscall with single command to create a map. Next patch adds commands to access maps. 'maps' is a generic storage of different types for sharing data between kernel and userspace. Userspace example: /* this syscall wrapper creates a map with given type and attributes * and returns map_fd on success. * use close(map_fd) to delete the map */ int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries) { union bpf_attr attr = { .map_type = map_type, .key_size = key_size, .value_size = value_size, .max_entries = max_entries }; return bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); } 'union bpf_attr' is backwards compatible with future extensions. More details in Documentation/networking/filter.txt and in manpage Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
- Loading branch information
Showing
5 changed files
with
273 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com | ||
* | ||
* This program is free software; you can redistribute it and/or | ||
* modify it under the terms of version 2 of the GNU General Public | ||
* License as published by the Free Software Foundation. | ||
*/ | ||
#ifndef _LINUX_BPF_H | ||
#define _LINUX_BPF_H 1 | ||
|
||
#include <uapi/linux/bpf.h> | ||
#include <linux/workqueue.h> | ||
|
||
struct bpf_map; | ||
|
||
/* map is generic key/value storage optionally accesible by eBPF programs */ | ||
struct bpf_map_ops { | ||
/* funcs callable from userspace (via syscall) */ | ||
struct bpf_map *(*map_alloc)(union bpf_attr *attr); | ||
void (*map_free)(struct bpf_map *); | ||
}; | ||
|
||
struct bpf_map { | ||
atomic_t refcnt; | ||
enum bpf_map_type map_type; | ||
u32 key_size; | ||
u32 value_size; | ||
u32 max_entries; | ||
struct bpf_map_ops *ops; | ||
struct work_struct work; | ||
}; | ||
|
||
struct bpf_map_type_list { | ||
struct list_head list_node; | ||
struct bpf_map_ops *ops; | ||
enum bpf_map_type type; | ||
}; | ||
|
||
void bpf_register_map_type(struct bpf_map_type_list *tl); | ||
void bpf_map_put(struct bpf_map *map); | ||
|
||
#endif /* _LINUX_BPF_H */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
obj-y := core.o | ||
obj-y := core.o syscall.o |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com | ||
* | ||
* This program is free software; you can redistribute it and/or | ||
* modify it under the terms of version 2 of the GNU General Public | ||
* License as published by the Free Software Foundation. | ||
* | ||
* This program is distributed in the hope that it will be useful, but | ||
* WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
* General Public License for more details. | ||
*/ | ||
#include <linux/bpf.h> | ||
#include <linux/syscalls.h> | ||
#include <linux/slab.h> | ||
#include <linux/anon_inodes.h> | ||
|
||
static LIST_HEAD(bpf_map_types); | ||
|
||
static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) | ||
{ | ||
struct bpf_map_type_list *tl; | ||
struct bpf_map *map; | ||
|
||
list_for_each_entry(tl, &bpf_map_types, list_node) { | ||
if (tl->type == attr->map_type) { | ||
map = tl->ops->map_alloc(attr); | ||
if (IS_ERR(map)) | ||
return map; | ||
map->ops = tl->ops; | ||
map->map_type = attr->map_type; | ||
return map; | ||
} | ||
} | ||
return ERR_PTR(-EINVAL); | ||
} | ||
|
||
/* boot time registration of different map implementations */ | ||
void bpf_register_map_type(struct bpf_map_type_list *tl) | ||
{ | ||
list_add(&tl->list_node, &bpf_map_types); | ||
} | ||
|
||
/* called from workqueue */ | ||
static void bpf_map_free_deferred(struct work_struct *work) | ||
{ | ||
struct bpf_map *map = container_of(work, struct bpf_map, work); | ||
|
||
/* implementation dependent freeing */ | ||
map->ops->map_free(map); | ||
} | ||
|
||
/* decrement map refcnt and schedule it for freeing via workqueue | ||
* (unrelying map implementation ops->map_free() might sleep) | ||
*/ | ||
void bpf_map_put(struct bpf_map *map) | ||
{ | ||
if (atomic_dec_and_test(&map->refcnt)) { | ||
INIT_WORK(&map->work, bpf_map_free_deferred); | ||
schedule_work(&map->work); | ||
} | ||
} | ||
|
||
static int bpf_map_release(struct inode *inode, struct file *filp) | ||
{ | ||
struct bpf_map *map = filp->private_data; | ||
|
||
bpf_map_put(map); | ||
return 0; | ||
} | ||
|
||
static const struct file_operations bpf_map_fops = { | ||
.release = bpf_map_release, | ||
}; | ||
|
||
/* helper macro to check that unused fields 'union bpf_attr' are zero */ | ||
#define CHECK_ATTR(CMD) \ | ||
memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ | ||
sizeof(attr->CMD##_LAST_FIELD), 0, \ | ||
sizeof(*attr) - \ | ||
offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ | ||
sizeof(attr->CMD##_LAST_FIELD)) != NULL | ||
|
||
#define BPF_MAP_CREATE_LAST_FIELD max_entries | ||
/* called via syscall */ | ||
static int map_create(union bpf_attr *attr) | ||
{ | ||
struct bpf_map *map; | ||
int err; | ||
|
||
err = CHECK_ATTR(BPF_MAP_CREATE); | ||
if (err) | ||
return -EINVAL; | ||
|
||
/* find map type and init map: hashtable vs rbtree vs bloom vs ... */ | ||
map = find_and_alloc_map(attr); | ||
if (IS_ERR(map)) | ||
return PTR_ERR(map); | ||
|
||
atomic_set(&map->refcnt, 1); | ||
|
||
err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC); | ||
|
||
if (err < 0) | ||
/* failed to allocate fd */ | ||
goto free_map; | ||
|
||
return err; | ||
|
||
free_map: | ||
map->ops->map_free(map); | ||
return err; | ||
} | ||
|
||
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) | ||
{ | ||
union bpf_attr attr = {}; | ||
int err; | ||
|
||
/* the syscall is limited to root temporarily. This restriction will be | ||
* lifted when security audit is clean. Note that eBPF+tracing must have | ||
* this restriction, since it may pass kernel data to user space | ||
*/ | ||
if (!capable(CAP_SYS_ADMIN)) | ||
return -EPERM; | ||
|
||
if (!access_ok(VERIFY_READ, uattr, 1)) | ||
return -EFAULT; | ||
|
||
if (size > PAGE_SIZE) /* silly large */ | ||
return -E2BIG; | ||
|
||
/* If we're handed a bigger struct than we know of, | ||
* ensure all the unknown bits are 0 - i.e. new | ||
* user-space does not rely on any kernel feature | ||
* extensions we dont know about yet. | ||
*/ | ||
if (size > sizeof(attr)) { | ||
unsigned char __user *addr; | ||
unsigned char __user *end; | ||
unsigned char val; | ||
|
||
addr = (void __user *)uattr + sizeof(attr); | ||
end = (void __user *)uattr + size; | ||
|
||
for (; addr < end; addr++) { | ||
err = get_user(val, addr); | ||
if (err) | ||
return err; | ||
if (val) | ||
return -E2BIG; | ||
} | ||
size = sizeof(attr); | ||
} | ||
|
||
/* copy attributes from user space, may be less than sizeof(bpf_attr) */ | ||
if (copy_from_user(&attr, uattr, size) != 0) | ||
return -EFAULT; | ||
|
||
switch (cmd) { | ||
case BPF_MAP_CREATE: | ||
err = map_create(&attr); | ||
break; | ||
default: | ||
err = -EINVAL; | ||
break; | ||
} | ||
|
||
return err; | ||
} |