OSDN Git Service

drop_monitor: remove quadratic behavior
authorEric Dumazet <edumazet@google.com>
Tue, 22 Feb 2022 22:04:50 +0000 (14:04 -0800)
committerDavid S. Miller <davem@davemloft.net>
Wed, 23 Feb 2022 12:39:58 +0000 (12:39 +0000)
drop_monitor is using an unique list on which all netdevices in
the host have an element, regardless of their netns.

This scales poorly, not only at device unregister time (what I
caught during my netns dismantle stress tests), but also at packet
processing time whenever trace_napi_poll_hit() is called.

If the intent was to avoid adding one pointer in 'struct net_device'
then surely we prefer O(1) behavior.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/linux/netdevice.h
net/core/drop_monitor.c

index 93fc680..c79ee22 100644 (file)
@@ -2236,7 +2236,9 @@ struct net_device {
 #if IS_ENABLED(CONFIG_MRP)
        struct mrp_port __rcu   *mrp_port;
 #endif
-
+#if IS_ENABLED(CONFIG_NET_DROP_MONITOR)
+       struct dm_hw_stat_delta __rcu *dm_private;
+#endif
        struct device           dev;
        const struct attribute_group *sysfs_groups[4];
        const struct attribute_group *sysfs_rx_queue_group;
index d7d177f..b89e3e9 100644 (file)
@@ -64,7 +64,6 @@ static const char * const drop_reasons[] = {
 /* net_dm_mutex
  *
  * An overall lock guarding every operation coming from userspace.
- * It also guards the global 'hw_stats_list' list.
  */
 static DEFINE_MUTEX(net_dm_mutex);
 
@@ -100,11 +99,9 @@ struct per_cpu_dm_data {
 };
 
 struct dm_hw_stat_delta {
-       struct net_device *dev;
        unsigned long last_rx;
-       struct list_head list;
-       struct rcu_head rcu;
        unsigned long last_drop_val;
+       struct rcu_head rcu;
 };
 
 static struct genl_family net_drop_monitor_family;
@@ -115,7 +112,6 @@ static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_hw_cpu_data);
 static int dm_hit_limit = 64;
 static int dm_delay = 1;
 static unsigned long dm_hw_check_delta = 2*HZ;
-static LIST_HEAD(hw_stats_list);
 
 static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY;
 static u32 net_dm_trunc_len;
@@ -287,33 +283,27 @@ static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb,
 static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
                                int work, int budget)
 {
-       struct dm_hw_stat_delta *new_stat;
-
+       struct net_device *dev = napi->dev;
+       struct dm_hw_stat_delta *stat;
        /*
         * Don't check napi structures with no associated device
         */
-       if (!napi->dev)
+       if (!dev)
                return;
 
        rcu_read_lock();
-       list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
-               struct net_device *dev;
-
+       stat = rcu_dereference(dev->dm_private);
+       if (stat) {
                /*
                 * only add a note to our monitor buffer if:
-                * 1) this is the dev we received on
-                * 2) its after the last_rx delta
-                * 3) our rx_dropped count has gone up
+                * 1) its after the last_rx delta
+                * 2) our rx_dropped count has gone up
                 */
-               /* Paired with WRITE_ONCE() in dropmon_net_event() */
-               dev = READ_ONCE(new_stat->dev);
-               if ((dev == napi->dev)  &&
-                   (time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) &&
-                   (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) {
+               if (time_after(jiffies, stat->last_rx + dm_hw_check_delta) &&
+                   (dev->stats.rx_dropped != stat->last_drop_val)) {
                        trace_drop_common(NULL, NULL);
-                       new_stat->last_drop_val = napi->dev->stats.rx_dropped;
-                       new_stat->last_rx = jiffies;
-                       break;
+                       stat->last_drop_val = dev->stats.rx_dropped;
+                       stat->last_rx = jiffies;
                }
        }
        rcu_read_unlock();
@@ -1198,7 +1188,6 @@ err_module_put:
 
 static void net_dm_trace_off_set(void)
 {
-       struct dm_hw_stat_delta *new_stat, *temp;
        const struct net_dm_alert_ops *ops;
        int cpu;
 
@@ -1222,13 +1211,6 @@ static void net_dm_trace_off_set(void)
                        consume_skb(skb);
        }
 
-       list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
-               if (new_stat->dev == NULL) {
-                       list_del_rcu(&new_stat->list);
-                       kfree_rcu(new_stat, rcu);
-               }
-       }
-
        module_put(THIS_MODULE);
 }
 
@@ -1589,41 +1571,28 @@ static int dropmon_net_event(struct notifier_block *ev_block,
                             unsigned long event, void *ptr)
 {
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-       struct dm_hw_stat_delta *new_stat = NULL;
-       struct dm_hw_stat_delta *tmp;
+       struct dm_hw_stat_delta *stat;
 
        switch (event) {
        case NETDEV_REGISTER:
-               new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL);
+               if (WARN_ON_ONCE(rtnl_dereference(dev->dm_private)))
+                       break;
+               stat = kzalloc(sizeof(*stat), GFP_KERNEL);
+               if (!stat)
+                       break;
 
-               if (!new_stat)
-                       goto out;
+               stat->last_rx = jiffies;
+               rcu_assign_pointer(dev->dm_private, stat);
 
-               new_stat->dev = dev;
-               new_stat->last_rx = jiffies;
-               mutex_lock(&net_dm_mutex);
-               list_add_rcu(&new_stat->list, &hw_stats_list);
-               mutex_unlock(&net_dm_mutex);
                break;
        case NETDEV_UNREGISTER:
-               mutex_lock(&net_dm_mutex);
-               list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
-                       if (new_stat->dev == dev) {
-
-                               /* Paired with READ_ONCE() in trace_napi_poll_hit() */
-                               WRITE_ONCE(new_stat->dev, NULL);
-
-                               if (trace_state == TRACE_OFF) {
-                                       list_del_rcu(&new_stat->list);
-                                       kfree_rcu(new_stat, rcu);
-                                       break;
-                               }
-                       }
+               stat = rtnl_dereference(dev->dm_private);
+               if (stat) {
+                       rcu_assign_pointer(dev->dm_private, NULL);
+                       kfree_rcu(stat, rcu);
                }
-               mutex_unlock(&net_dm_mutex);
                break;
        }
-out:
        return NOTIFY_DONE;
 }