Skip to content

Commit 055eb95

Browse files
fomichevAlexei Starovoitov
authored andcommitted
bpf: Move rcu lock management out of BPF_PROG_RUN routines
Commit 7d08c2c ("bpf: Refactor BPF_PROG_RUN_ARRAY family of macros into functions") switched a bunch of BPF_PROG_RUN macros to inline routines. This changed the semantic a bit. Due to arguments expansion of macros, it used to be: rcu_read_lock(); array = rcu_dereference(cgrp->bpf.effective[atype]); ... Now, with with inline routines, we have: array_rcu = rcu_dereference(cgrp->bpf.effective[atype]); /* array_rcu can be kfree'd here */ rcu_read_lock(); array = rcu_dereference(array_rcu); I'm assuming in practice rcu subsystem isn't fast enough to trigger this but let's use rcu API properly. Also, rename to lower caps to not confuse with macros. Additionally, drop and expand BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY. See [1] for more context. [1] https://lore.kernel.org/bpf/CAKH8qBs60fOinFdxiiQikK_q0EcVxGvNTQoWvHLEUGbgcj1UYg@mail.gmail.com/T/#u v2 - keep rcu locks inside by passing cgroup_bpf Fixes: 7d08c2c ("bpf: Refactor BPF_PROG_RUN_ARRAY family of macros into functions") Signed-off-by: Stanislav Fomichev <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]> Acked-by: Martin KaFai Lau <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent 2324257 commit 055eb95

File tree

4 files changed

+124
-128
lines changed

4 files changed

+124
-128
lines changed

drivers/media/rc/bpf-lirc.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,8 +216,12 @@ void lirc_bpf_run(struct rc_dev *rcdev, u32 sample)
216216

217217
raw->bpf_sample = sample;
218218

219-
if (raw->progs)
220-
BPF_PROG_RUN_ARRAY(raw->progs, &raw->bpf_sample, bpf_prog_run);
219+
if (raw->progs) {
220+
rcu_read_lock();
221+
bpf_prog_run_array(rcu_dereference(raw->progs),
222+
&raw->bpf_sample, bpf_prog_run);
223+
rcu_read_unlock();
224+
}
221225
}
222226

223227
/*

include/linux/bpf.h

Lines changed: 7 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -1221,7 +1221,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
12211221
/* an array of programs to be executed under rcu_lock.
12221222
*
12231223
* Typical usage:
1224-
* ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, bpf_prog_run);
1224+
* ret = bpf_prog_run_array(rcu_dereference(&bpf_prog_array), ctx, bpf_prog_run);
12251225
*
12261226
* the structure returned by bpf_prog_array_alloc() should be populated
12271227
* with program pointers and the last pointer must be NULL.
@@ -1315,83 +1315,22 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
13151315

13161316
typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
13171317

1318-
static __always_inline int
1319-
BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
1320-
const void *ctx, bpf_prog_run_fn run_prog,
1321-
int retval, u32 *ret_flags)
1322-
{
1323-
const struct bpf_prog_array_item *item;
1324-
const struct bpf_prog *prog;
1325-
const struct bpf_prog_array *array;
1326-
struct bpf_run_ctx *old_run_ctx;
1327-
struct bpf_cg_run_ctx run_ctx;
1328-
u32 func_ret;
1329-
1330-
run_ctx.retval = retval;
1331-
migrate_disable();
1332-
rcu_read_lock();
1333-
array = rcu_dereference(array_rcu);
1334-
item = &array->items[0];
1335-
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
1336-
while ((prog = READ_ONCE(item->prog))) {
1337-
run_ctx.prog_item = item;
1338-
func_ret = run_prog(prog, ctx);
1339-
if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval))
1340-
run_ctx.retval = -EPERM;
1341-
*(ret_flags) |= (func_ret >> 1);
1342-
item++;
1343-
}
1344-
bpf_reset_run_ctx(old_run_ctx);
1345-
rcu_read_unlock();
1346-
migrate_enable();
1347-
return run_ctx.retval;
1348-
}
1349-
1350-
static __always_inline int
1351-
BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
1352-
const void *ctx, bpf_prog_run_fn run_prog,
1353-
int retval)
1354-
{
1355-
const struct bpf_prog_array_item *item;
1356-
const struct bpf_prog *prog;
1357-
const struct bpf_prog_array *array;
1358-
struct bpf_run_ctx *old_run_ctx;
1359-
struct bpf_cg_run_ctx run_ctx;
1360-
1361-
run_ctx.retval = retval;
1362-
migrate_disable();
1363-
rcu_read_lock();
1364-
array = rcu_dereference(array_rcu);
1365-
item = &array->items[0];
1366-
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
1367-
while ((prog = READ_ONCE(item->prog))) {
1368-
run_ctx.prog_item = item;
1369-
if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval))
1370-
run_ctx.retval = -EPERM;
1371-
item++;
1372-
}
1373-
bpf_reset_run_ctx(old_run_ctx);
1374-
rcu_read_unlock();
1375-
migrate_enable();
1376-
return run_ctx.retval;
1377-
}
1378-
13791318
static __always_inline u32
1380-
BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu,
1319+
bpf_prog_run_array(const struct bpf_prog_array *array,
13811320
const void *ctx, bpf_prog_run_fn run_prog)
13821321
{
13831322
const struct bpf_prog_array_item *item;
13841323
const struct bpf_prog *prog;
1385-
const struct bpf_prog_array *array;
13861324
struct bpf_run_ctx *old_run_ctx;
13871325
struct bpf_trace_run_ctx run_ctx;
13881326
u32 ret = 1;
13891327

1390-
migrate_disable();
1391-
rcu_read_lock();
1392-
array = rcu_dereference(array_rcu);
1328+
RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "no rcu lock held");
1329+
13931330
if (unlikely(!array))
1394-
goto out;
1331+
return ret;
1332+
1333+
migrate_disable();
13951334
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
13961335
item = &array->items[0];
13971336
while ((prog = READ_ONCE(item->prog))) {
@@ -1400,50 +1339,10 @@ BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu,
14001339
item++;
14011340
}
14021341
bpf_reset_run_ctx(old_run_ctx);
1403-
out:
1404-
rcu_read_unlock();
14051342
migrate_enable();
14061343
return ret;
14071344
}
14081345

1409-
/* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs
1410-
* so BPF programs can request cwr for TCP packets.
1411-
*
1412-
* Current cgroup skb programs can only return 0 or 1 (0 to drop the
1413-
* packet. This macro changes the behavior so the low order bit
1414-
* indicates whether the packet should be dropped (0) or not (1)
1415-
* and the next bit is a congestion notification bit. This could be
1416-
* used by TCP to call tcp_enter_cwr()
1417-
*
1418-
* Hence, new allowed return values of CGROUP EGRESS BPF programs are:
1419-
* 0: drop packet
1420-
* 1: keep packet
1421-
* 2: drop packet and cn
1422-
* 3: keep packet and cn
1423-
*
1424-
* This macro then converts it to one of the NET_XMIT or an error
1425-
* code that is then interpreted as drop packet (and no cn):
1426-
* 0: NET_XMIT_SUCCESS skb should be transmitted
1427-
* 1: NET_XMIT_DROP skb should be dropped and cn
1428-
* 2: NET_XMIT_CN skb should be transmitted and cn
1429-
* 3: -err skb should be dropped
1430-
*/
1431-
#define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \
1432-
({ \
1433-
u32 _flags = 0; \
1434-
bool _cn; \
1435-
u32 _ret; \
1436-
_ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \
1437-
_cn = _flags & BPF_RET_SET_CN; \
1438-
if (_ret && !IS_ERR_VALUE((long)_ret)) \
1439-
_ret = -EFAULT; \
1440-
if (!_ret) \
1441-
_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
1442-
else \
1443-
_ret = (_cn ? NET_XMIT_DROP : _ret); \
1444-
_ret; \
1445-
})
1446-
14471346
#ifdef CONFIG_BPF_SYSCALL
14481347
DECLARE_PER_CPU(int, bpf_prog_active);
14491348
extern struct mutex bpf_stats_enabled_mutex;

kernel/bpf/cgroup.c

Lines changed: 107 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,72 @@
2222
DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_CGROUP_BPF_ATTACH_TYPE);
2323
EXPORT_SYMBOL(cgroup_bpf_enabled_key);
2424

25+
/* __always_inline is necessary to prevent indirect call through run_prog
26+
* function pointer.
27+
*/
28+
static __always_inline int
29+
bpf_prog_run_array_cg_flags(const struct cgroup_bpf *cgrp,
30+
enum cgroup_bpf_attach_type atype,
31+
const void *ctx, bpf_prog_run_fn run_prog,
32+
int retval, u32 *ret_flags)
33+
{
34+
const struct bpf_prog_array_item *item;
35+
const struct bpf_prog *prog;
36+
const struct bpf_prog_array *array;
37+
struct bpf_run_ctx *old_run_ctx;
38+
struct bpf_cg_run_ctx run_ctx;
39+
u32 func_ret;
40+
41+
run_ctx.retval = retval;
42+
migrate_disable();
43+
rcu_read_lock();
44+
array = rcu_dereference(cgrp->effective[atype]);
45+
item = &array->items[0];
46+
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
47+
while ((prog = READ_ONCE(item->prog))) {
48+
run_ctx.prog_item = item;
49+
func_ret = run_prog(prog, ctx);
50+
if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval))
51+
run_ctx.retval = -EPERM;
52+
*(ret_flags) |= (func_ret >> 1);
53+
item++;
54+
}
55+
bpf_reset_run_ctx(old_run_ctx);
56+
rcu_read_unlock();
57+
migrate_enable();
58+
return run_ctx.retval;
59+
}
60+
61+
static __always_inline int
62+
bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
63+
enum cgroup_bpf_attach_type atype,
64+
const void *ctx, bpf_prog_run_fn run_prog,
65+
int retval)
66+
{
67+
const struct bpf_prog_array_item *item;
68+
const struct bpf_prog *prog;
69+
const struct bpf_prog_array *array;
70+
struct bpf_run_ctx *old_run_ctx;
71+
struct bpf_cg_run_ctx run_ctx;
72+
73+
run_ctx.retval = retval;
74+
migrate_disable();
75+
rcu_read_lock();
76+
array = rcu_dereference(cgrp->effective[atype]);
77+
item = &array->items[0];
78+
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
79+
while ((prog = READ_ONCE(item->prog))) {
80+
run_ctx.prog_item = item;
81+
if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval))
82+
run_ctx.retval = -EPERM;
83+
item++;
84+
}
85+
bpf_reset_run_ctx(old_run_ctx);
86+
rcu_read_unlock();
87+
migrate_enable();
88+
return run_ctx.retval;
89+
}
90+
2591
void cgroup_bpf_offline(struct cgroup *cgrp)
2692
{
2793
cgroup_get(cgrp);
@@ -1075,11 +1141,38 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
10751141
bpf_compute_and_save_data_end(skb, &saved_data_end);
10761142

10771143
if (atype == CGROUP_INET_EGRESS) {
1078-
ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(
1079-
cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb);
1144+
u32 flags = 0;
1145+
bool cn;
1146+
1147+
ret = bpf_prog_run_array_cg_flags(
1148+
&cgrp->bpf, atype,
1149+
skb, __bpf_prog_run_save_cb, 0, &flags);
1150+
1151+
/* Return values of CGROUP EGRESS BPF programs are:
1152+
* 0: drop packet
1153+
* 1: keep packet
1154+
* 2: drop packet and cn
1155+
* 3: keep packet and cn
1156+
*
1157+
* The returned value is then converted to one of the NET_XMIT
1158+
* or an error code that is then interpreted as drop packet
1159+
* (and no cn):
1160+
* 0: NET_XMIT_SUCCESS skb should be transmitted
1161+
* 1: NET_XMIT_DROP skb should be dropped and cn
1162+
* 2: NET_XMIT_CN skb should be transmitted and cn
1163+
* 3: -err skb should be dropped
1164+
*/
1165+
1166+
cn = flags & BPF_RET_SET_CN;
1167+
if (ret && !IS_ERR_VALUE((long)ret))
1168+
ret = -EFAULT;
1169+
if (!ret)
1170+
ret = (cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);
1171+
else
1172+
ret = (cn ? NET_XMIT_DROP : ret);
10801173
} else {
1081-
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb,
1082-
__bpf_prog_run_save_cb, 0);
1174+
ret = bpf_prog_run_array_cg(&cgrp->bpf, atype,
1175+
skb, __bpf_prog_run_save_cb, 0);
10831176
if (ret && !IS_ERR_VALUE((long)ret))
10841177
ret = -EFAULT;
10851178
}
@@ -1109,8 +1202,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
11091202
{
11101203
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
11111204

1112-
return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk,
1113-
bpf_prog_run, 0);
1205+
return bpf_prog_run_array_cg(&cgrp->bpf, atype, sk, bpf_prog_run, 0);
11141206
}
11151207
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
11161208

@@ -1155,8 +1247,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
11551247
}
11561248

11571249
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
1158-
return BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
1159-
bpf_prog_run, 0, flags);
1250+
return bpf_prog_run_array_cg_flags(&cgrp->bpf, atype,
1251+
&ctx, bpf_prog_run, 0, flags);
11601252
}
11611253
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
11621254

@@ -1182,8 +1274,8 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
11821274
{
11831275
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
11841276

1185-
return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
1186-
bpf_prog_run, 0);
1277+
return bpf_prog_run_array_cg(&cgrp->bpf, atype, sock_ops, bpf_prog_run,
1278+
0);
11871279
}
11881280
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
11891281

@@ -1200,8 +1292,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
12001292

12011293
rcu_read_lock();
12021294
cgrp = task_dfl_cgroup(current);
1203-
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
1204-
bpf_prog_run, 0);
1295+
ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0);
12051296
rcu_read_unlock();
12061297

12071298
return ret;
@@ -1366,8 +1457,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
13661457

13671458
rcu_read_lock();
13681459
cgrp = task_dfl_cgroup(current);
1369-
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
1370-
bpf_prog_run, 0);
1460+
ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0);
13711461
rcu_read_unlock();
13721462

13731463
kfree(ctx.cur_val);
@@ -1459,7 +1549,7 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
14591549
}
14601550

14611551
lock_sock(sk);
1462-
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT],
1552+
ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_SETSOCKOPT,
14631553
&ctx, bpf_prog_run, 0);
14641554
release_sock(sk);
14651555

@@ -1559,7 +1649,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
15591649
}
15601650

15611651
lock_sock(sk);
1562-
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
1652+
ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT,
15631653
&ctx, bpf_prog_run, retval);
15641654
release_sock(sk);
15651655

@@ -1608,7 +1698,7 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
16081698
* be called if that data shouldn't be "exported".
16091699
*/
16101700

1611-
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
1701+
ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT,
16121702
&ctx, bpf_prog_run, retval);
16131703
if (ret < 0)
16141704
return ret;

kernel/trace/bpf_trace.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,10 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
129129
* out of events when it was updated in between this and the
130130
* rcu_dereference() which is accepted risk.
131131
*/
132-
ret = BPF_PROG_RUN_ARRAY(call->prog_array, ctx, bpf_prog_run);
132+
rcu_read_lock();
133+
ret = bpf_prog_run_array(rcu_dereference(call->prog_array),
134+
ctx, bpf_prog_run);
135+
rcu_read_unlock();
133136

134137
out:
135138
__this_cpu_dec(bpf_prog_active);

0 commit comments

Comments
 (0)