OSDN Git Service

net: Add SO_BUSY_POLL_BUDGET socket option
authorBjörn Töpel <bjorn.topel@intel.com>
Mon, 30 Nov 2020 18:51:57 +0000 (19:51 +0100)
committerDaniel Borkmann <daniel@iogearbox.net>
Mon, 30 Nov 2020 23:09:25 +0000 (00:09 +0100)
This option lets a user set a per socket NAPI budget for
busy-polling. If the options is not set, it will use the default of 8.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/bpf/20201130185205.196029-3-bjorn.topel@gmail.com
arch/alpha/include/uapi/asm/socket.h
arch/mips/include/uapi/asm/socket.h
arch/parisc/include/uapi/asm/socket.h
arch/sparc/include/uapi/asm/socket.h
fs/eventpoll.c
include/net/busy_poll.h
include/net/sock.h
include/uapi/asm-generic/socket.h
net/core/dev.c
net/core/sock.c

index 5383596..5742035 100644 (file)
 #define SO_DETACH_REUSEPORT_BPF 68
 
 #define SO_PREFER_BUSY_POLL    69
+#define SO_BUSY_POLL_BUDGET    70
 
 #if !defined(__KERNEL__)
 
index e406e73..2d94996 100644 (file)
 #define SO_DETACH_REUSEPORT_BPF 68
 
 #define SO_PREFER_BUSY_POLL    69
+#define SO_BUSY_POLL_BUDGET    70
 
 #if !defined(__KERNEL__)
 
index 1bc4620..f609043 100644 (file)
 #define SO_DETACH_REUSEPORT_BPF 0x4042
 
 #define SO_PREFER_BUSY_POLL    0x4043
+#define SO_BUSY_POLL_BUDGET    0x4044
 
 #if !defined(__KERNEL__)
 
index 99688cf..848a22f 100644 (file)
 #define SO_DETACH_REUSEPORT_BPF  0x0047
 
 #define SO_PREFER_BUSY_POLL     0x0048
+#define SO_BUSY_POLL_BUDGET     0x0049
 
 #if !defined(__KERNEL__)
 
index e11fab3..73c346e 100644 (file)
@@ -397,7 +397,8 @@ static void ep_busy_loop(struct eventpoll *ep, int nonblock)
        unsigned int napi_id = READ_ONCE(ep->napi_id);
 
        if ((napi_id >= MIN_NAPI_ID) && net_busy_loop_on())
-               napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, ep, false);
+               napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, ep, false,
+                              BUSY_POLL_BUDGET);
 }
 
 static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep)
index 0292b83..2f8f518 100644 (file)
@@ -23,6 +23,8 @@
  */
 #define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1))
 
+#define BUSY_POLL_BUDGET 8
+
 #ifdef CONFIG_NET_RX_BUSY_POLL
 
 struct napi_struct;
@@ -43,7 +45,7 @@ bool sk_busy_loop_end(void *p, unsigned long start_time);
 
 void napi_busy_loop(unsigned int napi_id,
                    bool (*loop_end)(void *, unsigned long),
-                   void *loop_end_arg, bool prefer_busy_poll);
+                   void *loop_end_arg, bool prefer_busy_poll, u16 budget);
 
 #else /* CONFIG_NET_RX_BUSY_POLL */
 static inline unsigned long net_busy_loop_on(void)
@@ -106,7 +108,8 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock)
 
        if (napi_id >= MIN_NAPI_ID)
                napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk,
-                              READ_ONCE(sk->sk_prefer_busy_poll));
+                              READ_ONCE(sk->sk_prefer_busy_poll),
+                              READ_ONCE(sk->sk_busy_poll_budget) ?: BUSY_POLL_BUDGET);
 #endif
 }
 
index d49b89b..77ba2c2 100644 (file)
@@ -302,6 +302,7 @@ struct bpf_local_storage;
   *    @sk_max_ack_backlog: listen backlog set in listen()
   *    @sk_uid: user id of owner
   *    @sk_prefer_busy_poll: prefer busypolling over softirq processing
+  *    @sk_busy_poll_budget: napi processing budget when busypolling
   *    @sk_priority: %SO_PRIORITY setting
   *    @sk_type: socket type (%SOCK_STREAM, etc)
   *    @sk_protocol: which protocol this socket belongs in this network family
@@ -482,6 +483,7 @@ struct sock {
        kuid_t                  sk_uid;
 #ifdef CONFIG_NET_RX_BUSY_POLL
        u8                      sk_prefer_busy_poll;
+       u16                     sk_busy_poll_budget;
 #endif
        struct pid              *sk_peer_pid;
        const struct cred       *sk_peer_cred;
index 7dd0240..4dcd13d 100644 (file)
 #define SO_DETACH_REUSEPORT_BPF 68
 
 #define SO_PREFER_BUSY_POLL    69
+#define SO_BUSY_POLL_BUDGET    70
 
 #if !defined(__KERNEL__)
 
index 6f8d2cf..7a1e593 100644 (file)
@@ -6496,8 +6496,6 @@ static struct napi_struct *napi_by_id(unsigned int napi_id)
 
 #if defined(CONFIG_NET_RX_BUSY_POLL)
 
-#define BUSY_POLL_BUDGET 8
-
 static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
 {
        if (!skip_schedule) {
@@ -6517,7 +6515,8 @@ static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
        clear_bit(NAPI_STATE_SCHED, &napi->state);
 }
 
-static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool prefer_busy_poll)
+static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool prefer_busy_poll,
+                          u16 budget)
 {
        bool skip_schedule = false;
        unsigned long timeout;
@@ -6549,21 +6548,21 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool
        /* All we really want here is to re-enable device interrupts.
         * Ideally, a new ndo_busy_poll_stop() could avoid another round.
         */
-       rc = napi->poll(napi, BUSY_POLL_BUDGET);
+       rc = napi->poll(napi, budget);
        /* We can't gro_normal_list() here, because napi->poll() might have
         * rearmed the napi (napi_complete_done()) in which case it could
         * already be running on another CPU.
         */
-       trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
+       trace_napi_poll(napi, rc, budget);
        netpoll_poll_unlock(have_poll_lock);
-       if (rc == BUSY_POLL_BUDGET)
+       if (rc == budget)
                __busy_poll_stop(napi, skip_schedule);
        local_bh_enable();
 }
 
 void napi_busy_loop(unsigned int napi_id,
                    bool (*loop_end)(void *, unsigned long),
-                   void *loop_end_arg, bool prefer_busy_poll)
+                   void *loop_end_arg, bool prefer_busy_poll, u16 budget)
 {
        unsigned long start_time = loop_end ? busy_loop_current_time() : 0;
        int (*napi_poll)(struct napi_struct *napi, int budget);
@@ -6606,8 +6605,8 @@ restart:
                        have_poll_lock = netpoll_poll_lock(napi);
                        napi_poll = napi->poll;
                }
-               work = napi_poll(napi, BUSY_POLL_BUDGET);
-               trace_napi_poll(napi, work, BUSY_POLL_BUDGET);
+               work = napi_poll(napi, budget);
+               trace_napi_poll(napi, work, budget);
                gro_normal_list(napi);
 count:
                if (work > 0)
@@ -6620,7 +6619,7 @@ count:
 
                if (unlikely(need_resched())) {
                        if (napi_poll)
-                               busy_poll_stop(napi, have_poll_lock, prefer_busy_poll);
+                               busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
                        preempt_enable();
                        rcu_read_unlock();
                        cond_resched();
@@ -6631,7 +6630,7 @@ count:
                cpu_relax();
        }
        if (napi_poll)
-               busy_poll_stop(napi, have_poll_lock, prefer_busy_poll);
+               busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
        preempt_enable();
 out:
        rcu_read_unlock();
index e05f2e5..d422a68 100644 (file)
@@ -1165,6 +1165,16 @@ set_sndbuf:
                else
                        WRITE_ONCE(sk->sk_prefer_busy_poll, valbool);
                break;
+       case SO_BUSY_POLL_BUDGET:
+               if (val > READ_ONCE(sk->sk_busy_poll_budget) && !capable(CAP_NET_ADMIN)) {
+                       ret = -EPERM;
+               } else {
+                       if (val < 0 || val > U16_MAX)
+                               ret = -EINVAL;
+                       else
+                               WRITE_ONCE(sk->sk_busy_poll_budget, val);
+               }
+               break;
 #endif
 
        case SO_MAX_PACING_RATE: