OSDN Git Service

netfilter: nfnetlink: re-enable conntrack expectation events
authorFlorian Westphal <fw@strlen.de>
Fri, 5 Aug 2022 08:59:57 +0000 (10:59 +0200)
committerFlorian Westphal <fw@strlen.de>
Thu, 11 Aug 2022 16:09:54 +0000 (18:09 +0200)
To avoid allocation of the conntrack extension area when possible,
the default behaviour was changed to only allocate the event extension
if a userspace program is subscribed to a notification group.

Problem is that while 'conntrack -E' does enable the event allocation
behind the scenes, 'conntrack -E expect' does not: no expectation events
are delivered unless user sets
"net.netfilter.nf_conntrack_events" back to 1 (always on).

Fix the autodetection to also consider EXP type group.

We need to track the 6 event groups (3+3, new/update/destroy for events and
for expectations each) independently, else we'd disable events again
if an expectation group becomes empty while there is still an active
event group.

Fixes: 2794cdb0b97b ("netfilter: nfnetlink: allow to detect if ctnetlink listeners exist")
Reported-by: Yi Chen <yiche@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
include/net/netns/conntrack.h
net/netfilter/nfnetlink.c

index 0677cd3..c396a38 100644 (file)
@@ -95,7 +95,7 @@ struct nf_ip_net {
 
 struct netns_ct {
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
-       bool ctnetlink_has_listener;
+       u8 ctnetlink_has_listener;
        bool ecache_dwork_pending;
 #endif
        u8                      sysctl_log_invalid; /* Log invalid packets */
index c24b124..9c44518 100644 (file)
@@ -44,6 +44,10 @@ MODULE_DESCRIPTION("Netfilter messages via netlink socket");
 
 static unsigned int nfnetlink_pernet_id __read_mostly;
 
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static DEFINE_SPINLOCK(nfnl_grp_active_lock);
+#endif
+
 struct nfnl_net {
        struct sock *nfnl;
 };
@@ -654,6 +658,44 @@ static void nfnetlink_rcv(struct sk_buff *skb)
                netlink_rcv_skb(skb, nfnetlink_rcv_msg);
 }
 
+static void nfnetlink_bind_event(struct net *net, unsigned int group)
+{
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+       int type, group_bit;
+       u8 v;
+
+       /* All NFNLGRP_CONNTRACK_* group bits fit into u8.
+        * The other groups are not relevant and can be ignored.
+        */
+       if (group >= 8)
+               return;
+
+       type = nfnl_group2type[group];
+
+       switch (type) {
+       case NFNL_SUBSYS_CTNETLINK:
+               break;
+       case NFNL_SUBSYS_CTNETLINK_EXP:
+               break;
+       default:
+               return;
+       }
+
+       group_bit = (1 << group);
+
+       spin_lock(&nfnl_grp_active_lock);
+       v = READ_ONCE(net->ct.ctnetlink_has_listener);
+       if ((v & group_bit) == 0) {
+               v |= group_bit;
+
+               /* read concurrently without nfnl_grp_active_lock held. */
+               WRITE_ONCE(net->ct.ctnetlink_has_listener, v);
+       }
+
+       spin_unlock(&nfnl_grp_active_lock);
+#endif
+}
+
 static int nfnetlink_bind(struct net *net, int group)
 {
        const struct nfnetlink_subsystem *ss;
@@ -670,28 +712,45 @@ static int nfnetlink_bind(struct net *net, int group)
        if (!ss)
                request_module_nowait("nfnetlink-subsys-%d", type);
 
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
-       if (type == NFNL_SUBSYS_CTNETLINK) {
-               nfnl_lock(NFNL_SUBSYS_CTNETLINK);
-               WRITE_ONCE(net->ct.ctnetlink_has_listener, true);
-               nfnl_unlock(NFNL_SUBSYS_CTNETLINK);
-       }
-#endif
+       nfnetlink_bind_event(net, group);
        return 0;
 }
 
 static void nfnetlink_unbind(struct net *net, int group)
 {
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
+       int type, group_bit;
+
        if (group <= NFNLGRP_NONE || group > NFNLGRP_MAX)
                return;
 
-       if (nfnl_group2type[group] == NFNL_SUBSYS_CTNETLINK) {
-               nfnl_lock(NFNL_SUBSYS_CTNETLINK);
-               if (!nfnetlink_has_listeners(net, group))
-                       WRITE_ONCE(net->ct.ctnetlink_has_listener, false);
-               nfnl_unlock(NFNL_SUBSYS_CTNETLINK);
+       type = nfnl_group2type[group];
+
+       switch (type) {
+       case NFNL_SUBSYS_CTNETLINK:
+               break;
+       case NFNL_SUBSYS_CTNETLINK_EXP:
+               break;
+       default:
+               return;
+       }
+
+       /* ctnetlink_has_listener is u8 */
+       if (group >= 8)
+               return;
+
+       group_bit = (1 << group);
+
+       spin_lock(&nfnl_grp_active_lock);
+       if (!nfnetlink_has_listeners(net, group)) {
+               u8 v = READ_ONCE(net->ct.ctnetlink_has_listener);
+
+               v &= ~group_bit;
+
+               /* read concurrently without nfnl_grp_active_lock held. */
+               WRITE_ONCE(net->ct.ctnetlink_has_listener, v);
        }
+       spin_unlock(&nfnl_grp_active_lock);
 #endif
 }