Skip to content

Commit 0838aa7

Browse files
committed
netfilter: fix netns dependencies with conntrack templates
Quoting Daniel Borkmann: "When adding connection tracking template rules to a netns, f.e. to configure netfilter zones, the kernel will endlessly busy-loop as soon as we try to delete the given netns in case there's at least one template present, which is problematic i.e. if there is such bravery that the priviledged user inside the netns is assumed untrusted. Minimal example: ip netns add foo ip netns exec foo iptables -t raw -A PREROUTING -d 1.2.3.4 -j CT --zone 1 ip netns del foo What happens is that when nf_ct_iterate_cleanup() is being called from nf_conntrack_cleanup_net_list() for a provided netns, we always end up with a net->ct.count > 0 and thus jump back to i_see_dead_people. We don't get a soft-lockup as we still have a schedule() point, but the serving CPU spins on 100% from that point onwards. Since templates are normally allocated with nf_conntrack_alloc(), we also bump net->ct.count. The issue why they are not yet nf_ct_put() is because the per netns .exit() handler from x_tables (which would eventually invoke xt_CT's xt_ct_tg_destroy() that drops reference on info->ct) is called in the dependency chain at a *later* point in time than the per netns .exit() handler for the connection tracker. This is clearly a chicken'n'egg problem: after the connection tracker .exit() handler, we've teared down all the connection tracking infrastructure already, so rightfully, xt_ct_tg_destroy() cannot be invoked at a later point in time during the netns cleanup, as that would lead to a use-after-free. At the same time, we cannot make x_tables depend on the connection tracker module, so that the xt_ct_tg_destroy() would be invoked earlier in the cleanup chain." Daniel confirms this has to do with the order in which modules are loaded or having compiled nf_conntrack as modules while x_tables built-in. So we have no guarantees regarding the order in which netns callbacks are executed. Fix this by allocating the templates through kmalloc() from the respective SYNPROXY and CT targets, so they don't depend on the conntrack kmem cache. Then, release then via nf_ct_tmpl_free() from destroy_conntrack(). This branch is marked as unlikely since conntrack templates are rarely allocated and only from the configuration plane path. Note that templates are not kept in any list to avoid further dependencies with nf_conntrack anymore, thus, the tmpl larval list is removed. Reported-by: Daniel Borkmann <[email protected]> Signed-off-by: Pablo Neira Ayuso <[email protected]> Tested-by: Daniel Borkmann <[email protected]>
1 parent 484836e commit 0838aa7

File tree

5 files changed

+51
-34
lines changed

5 files changed

+51
-34
lines changed

include/net/netfilter/nf_conntrack.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ extern unsigned int nf_conntrack_max;
291291
extern unsigned int nf_conntrack_hash_rnd;
292292
void init_nf_conntrack_hash_rnd(void);
293293

294-
void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl);
294+
struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags);
295295

296296
#define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count)
297297
#define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)

include/net/netns/conntrack.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ struct ct_pcpu {
6868
spinlock_t lock;
6969
struct hlist_nulls_head unconfirmed;
7070
struct hlist_nulls_head dying;
71-
struct hlist_nulls_head tmpl;
7271
};
7372

7473
struct netns_ct {

net/netfilter/nf_conntrack_core.c

Lines changed: 44 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,46 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
287287
spin_unlock(&pcpu->lock);
288288
}
289289

290+
/* Released via destroy_conntrack() */
291+
struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags)
292+
{
293+
struct nf_conn *tmpl;
294+
295+
tmpl = kzalloc(sizeof(struct nf_conn), GFP_KERNEL);
296+
if (tmpl == NULL)
297+
return NULL;
298+
299+
tmpl->status = IPS_TEMPLATE;
300+
write_pnet(&tmpl->ct_net, net);
301+
302+
#ifdef CONFIG_NF_CONNTRACK_ZONES
303+
if (zone) {
304+
struct nf_conntrack_zone *nf_ct_zone;
305+
306+
nf_ct_zone = nf_ct_ext_add(tmpl, NF_CT_EXT_ZONE, GFP_ATOMIC);
307+
if (!nf_ct_zone)
308+
goto out_free;
309+
nf_ct_zone->id = zone;
310+
}
311+
#endif
312+
atomic_set(&tmpl->ct_general.use, 0);
313+
314+
return tmpl;
315+
#ifdef CONFIG_NF_CONNTRACK_ZONES
316+
out_free:
317+
kfree(tmpl);
318+
return NULL;
319+
#endif
320+
}
321+
EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
322+
323+
static void nf_ct_tmpl_free(struct nf_conn *tmpl)
324+
{
325+
nf_ct_ext_destroy(tmpl);
326+
nf_ct_ext_free(tmpl);
327+
kfree(tmpl);
328+
}
329+
290330
static void
291331
destroy_conntrack(struct nf_conntrack *nfct)
292332
{
@@ -298,6 +338,10 @@ destroy_conntrack(struct nf_conntrack *nfct)
298338
NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
299339
NF_CT_ASSERT(!timer_pending(&ct->timeout));
300340

341+
if (unlikely(nf_ct_is_template(ct))) {
342+
nf_ct_tmpl_free(ct);
343+
return;
344+
}
301345
rcu_read_lock();
302346
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
303347
if (l4proto && l4proto->destroy)
@@ -540,28 +584,6 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
540584
}
541585
EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
542586

543-
/* deletion from this larval template list happens via nf_ct_put() */
544-
void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl)
545-
{
546-
struct ct_pcpu *pcpu;
547-
548-
__set_bit(IPS_TEMPLATE_BIT, &tmpl->status);
549-
__set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
550-
nf_conntrack_get(&tmpl->ct_general);
551-
552-
/* add this conntrack to the (per cpu) tmpl list */
553-
local_bh_disable();
554-
tmpl->cpu = smp_processor_id();
555-
pcpu = per_cpu_ptr(nf_ct_net(tmpl)->ct.pcpu_lists, tmpl->cpu);
556-
557-
spin_lock(&pcpu->lock);
558-
/* Overload tuple linked list to put us in template list. */
559-
hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
560-
&pcpu->tmpl);
561-
spin_unlock_bh(&pcpu->lock);
562-
}
563-
EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert);
564-
565587
/* Confirm a connection given skb; places it in hash table */
566588
int
567589
__nf_conntrack_confirm(struct sk_buff *skb)
@@ -1751,7 +1773,6 @@ int nf_conntrack_init_net(struct net *net)
17511773
spin_lock_init(&pcpu->lock);
17521774
INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL);
17531775
INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL);
1754-
INIT_HLIST_NULLS_HEAD(&pcpu->tmpl, TEMPLATE_NULLS_VAL);
17551776
}
17561777

17571778
net->ct.stat = alloc_percpu(struct ip_conntrack_stat);

net/netfilter/nf_synproxy_core.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -349,12 +349,10 @@ static void __net_exit synproxy_proc_exit(struct net *net)
349349
static int __net_init synproxy_net_init(struct net *net)
350350
{
351351
struct synproxy_net *snet = synproxy_pernet(net);
352-
struct nf_conntrack_tuple t;
353352
struct nf_conn *ct;
354353
int err = -ENOMEM;
355354

356-
memset(&t, 0, sizeof(t));
357-
ct = nf_conntrack_alloc(net, 0, &t, &t, GFP_KERNEL);
355+
ct = nf_ct_tmpl_alloc(net, 0, GFP_KERNEL);
358356
if (IS_ERR(ct)) {
359357
err = PTR_ERR(ct);
360358
goto err1;
@@ -365,7 +363,8 @@ static int __net_init synproxy_net_init(struct net *net)
365363
if (!nfct_synproxy_ext_add(ct))
366364
goto err2;
367365

368-
nf_conntrack_tmpl_insert(net, ct);
366+
__set_bit(IPS_CONFIRMED_BIT, &ct->status);
367+
nf_conntrack_get(&ct->ct_general);
369368
snet->tmpl = ct;
370369

371370
snet->stats = alloc_percpu(struct synproxy_stats);

net/netfilter/xt_CT.c

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,6 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
184184
static int xt_ct_tg_check(const struct xt_tgchk_param *par,
185185
struct xt_ct_target_info_v1 *info)
186186
{
187-
struct nf_conntrack_tuple t;
188187
struct nf_conn *ct;
189188
int ret = -EOPNOTSUPP;
190189

@@ -202,8 +201,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
202201
if (ret < 0)
203202
goto err1;
204203

205-
memset(&t, 0, sizeof(t));
206-
ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL);
204+
ct = nf_ct_tmpl_alloc(par->net, info->zone, GFP_KERNEL);
207205
ret = PTR_ERR(ct);
208206
if (IS_ERR(ct))
209207
goto err2;
@@ -227,8 +225,8 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
227225
if (ret < 0)
228226
goto err3;
229227
}
230-
231-
nf_conntrack_tmpl_insert(par->net, ct);
228+
__set_bit(IPS_CONFIRMED_BIT, &ct->status);
229+
nf_conntrack_get(&ct->ct_general);
232230
out:
233231
info->ct = ct;
234232
return 0;

0 commit comments

Comments
 (0)