Skip to content

Commit f1a2e44

Browse files
mauriciovasquezbernalAlexei Starovoitov
authored andcommitted
bpf: add queue and stack maps
Queue/stack maps implement a FIFO/LIFO data storage for ebpf programs. These maps support peek, pop and push operations that are exposed to eBPF programs through the new bpf_map[peek/pop/push] helpers. Those operations are exposed to userspace applications through the already existing syscalls in the following way: BPF_MAP_LOOKUP_ELEM -> peek BPF_MAP_LOOKUP_AND_DELETE_ELEM -> pop BPF_MAP_UPDATE_ELEM -> push Queue/stack maps are implemented using a buffer, tail and head indexes, hence BPF_F_NO_PREALLOC is not supported. As opposite to other maps, queue and stack do not use RCU for protecting maps values, the bpf_map[peek/pop] have a ARG_PTR_TO_UNINIT_MAP_VALUE argument that is a pointer to a memory zone where to save the value of a map. Basically the same as ARG_PTR_TO_UNINIT_MEM, but the size has not be passed as an extra argument. Our main motivation for implementing queue/stack maps was to keep track of a pool of elements, like network ports in a SNAT, however we forsee other use cases, like for exampling saving last N kernel events in a map and then analysing from userspace. Signed-off-by: Mauricio Vasquez B <[email protected]> Acked-by: Song Liu <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent 2ea864c commit f1a2e44

File tree

10 files changed

+401
-3
lines changed

10 files changed

+401
-3
lines changed

include/linux/bpf.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ struct bpf_map_ops {
3939
void *(*map_lookup_elem)(struct bpf_map *map, void *key);
4040
int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
4141
int (*map_delete_elem)(struct bpf_map *map, void *key);
42+
int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
43+
int (*map_pop_elem)(struct bpf_map *map, void *value);
44+
int (*map_peek_elem)(struct bpf_map *map, void *value);
4245

4346
/* funcs called by prog_array and perf_event_array map */
4447
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
@@ -811,6 +814,9 @@ static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map,
811814
extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
812815
extern const struct bpf_func_proto bpf_map_update_elem_proto;
813816
extern const struct bpf_func_proto bpf_map_delete_elem_proto;
817+
extern const struct bpf_func_proto bpf_map_push_elem_proto;
818+
extern const struct bpf_func_proto bpf_map_pop_elem_proto;
819+
extern const struct bpf_func_proto bpf_map_peek_elem_proto;
814820

815821
extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
816822
extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;

include/linux/bpf_types.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
6969
BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
7070
#endif
7171
#endif
72+
BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
73+
BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)

include/uapi/linux/bpf.h

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,8 @@ enum bpf_map_type {
128128
BPF_MAP_TYPE_CGROUP_STORAGE,
129129
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
130130
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
131+
BPF_MAP_TYPE_QUEUE,
132+
BPF_MAP_TYPE_STACK,
131133
};
132134

133135
enum bpf_prog_type {
@@ -462,6 +464,28 @@ union bpf_attr {
462464
* Return
463465
* 0 on success, or a negative error in case of failure.
464466
*
467+
* int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
468+
* Description
469+
* Push an element *value* in *map*. *flags* is one of:
470+
*
471+
* **BPF_EXIST**
472+
* If the queue/stack is full, the oldest element is removed to
473+
* make room for this.
474+
* Return
475+
* 0 on success, or a negative error in case of failure.
476+
*
477+
* int bpf_map_pop_elem(struct bpf_map *map, void *value)
478+
* Description
479+
* Pop an element from *map*.
480+
* Return
481+
* 0 on success, or a negative error in case of failure.
482+
*
483+
* int bpf_map_peek_elem(struct bpf_map *map, void *value)
484+
* Description
485+
* Get an element from *map* without removing it.
486+
* Return
487+
* 0 on success, or a negative error in case of failure.
488+
*
465489
* int bpf_probe_read(void *dst, u32 size, const void *src)
466490
* Description
467491
* For tracing programs, safely attempt to read *size* bytes from
@@ -2303,7 +2327,10 @@ union bpf_attr {
23032327
FN(skb_ancestor_cgroup_id), \
23042328
FN(sk_lookup_tcp), \
23052329
FN(sk_lookup_udp), \
2306-
FN(sk_release),
2330+
FN(sk_release), \
2331+
FN(map_push_elem), \
2332+
FN(map_pop_elem), \
2333+
FN(map_peek_elem),
23072334

23082335
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
23092336
* function eBPF program intends to call

kernel/bpf/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ obj-y := core.o
33

44
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
55
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
6-
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o
6+
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
77
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
88
obj-$(CONFIG_BPF_SYSCALL) += btf.o
99
ifeq ($(CONFIG_NET),y)

kernel/bpf/core.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1783,6 +1783,9 @@ BPF_CALL_0(bpf_user_rnd_u32)
17831783
const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
17841784
const struct bpf_func_proto bpf_map_update_elem_proto __weak;
17851785
const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
1786+
const struct bpf_func_proto bpf_map_push_elem_proto __weak;
1787+
const struct bpf_func_proto bpf_map_pop_elem_proto __weak;
1788+
const struct bpf_func_proto bpf_map_peek_elem_proto __weak;
17861789

17871790
const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
17881791
const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;

kernel/bpf/helpers.c

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,49 @@ const struct bpf_func_proto bpf_map_delete_elem_proto = {
7676
.arg2_type = ARG_PTR_TO_MAP_KEY,
7777
};
7878

79+
BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags)
80+
{
81+
return map->ops->map_push_elem(map, value, flags);
82+
}
83+
84+
const struct bpf_func_proto bpf_map_push_elem_proto = {
85+
.func = bpf_map_push_elem,
86+
.gpl_only = false,
87+
.pkt_access = true,
88+
.ret_type = RET_INTEGER,
89+
.arg1_type = ARG_CONST_MAP_PTR,
90+
.arg2_type = ARG_PTR_TO_MAP_VALUE,
91+
.arg3_type = ARG_ANYTHING,
92+
};
93+
94+
BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value)
95+
{
96+
return map->ops->map_pop_elem(map, value);
97+
}
98+
99+
const struct bpf_func_proto bpf_map_pop_elem_proto = {
100+
.func = bpf_map_pop_elem,
101+
.gpl_only = false,
102+
.pkt_access = true,
103+
.ret_type = RET_INTEGER,
104+
.arg1_type = ARG_CONST_MAP_PTR,
105+
.arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE,
106+
};
107+
108+
BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
109+
{
110+
return map->ops->map_peek_elem(map, value);
111+
}
112+
113+
const struct bpf_func_proto bpf_map_peek_elem_proto = {
114+
.func = bpf_map_pop_elem,
115+
.gpl_only = false,
116+
.pkt_access = true,
117+
.ret_type = RET_INTEGER,
118+
.arg1_type = ARG_CONST_MAP_PTR,
119+
.arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE,
120+
};
121+
79122
const struct bpf_func_proto bpf_get_prandom_u32_proto = {
80123
.func = bpf_user_rnd_u32,
81124
.gpl_only = false,

0 commit comments

Comments
 (0)