bpf: Allow rewriting to ports under ip_unprivileged_port_start

author Stanislav Fomichev <sdf@google.com>

Wed, 27 Jan 2021 19:31:39 +0000 (11:31 -0800)

committer Alexei Starovoitov <ast@kernel.org>

Thu, 28 Jan 2021 02:18:15 +0000 (18:18 -0800)
author Stanislav Fomichev <sdf@google.com>
Wed, 27 Jan 2021 19:31:39 +0000 (11:31 -0800)
committer Alexei Starovoitov <ast@kernel.org>
Thu, 28 Jan 2021 02:18:15 +0000 (18:18 -0800)
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h

index 0748fd8..c42e02b 100644 (file)
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -125,7 +125,8 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
  int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
                                       struct sockaddr *uaddr,
                                       enum bpf_attach_type type,
-                                     void *t_ctx);
+                                     void *t_ctx,
+                                     u32 *flags);
  
  int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
                                      struct bpf_sock_ops_kern *sock_ops,
@@ -231,30 +232,48 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
  
  #define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type)                                       \
  ({                                                                            \
+       u32 __unused_flags;                                                    \
         int __ret = 0;                                                         \
         if (cgroup_bpf_enabled(type))                                          \
                 __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
-                                                         NULL);               \
+                                                         NULL,                \
+                                                         &__unused_flags);    \
         __ret;                                                                 \
  })
  
  #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx)                   \
  ({                                                                            \
+       u32 __unused_flags;                                                    \
         int __ret = 0;                                                         \
         if (cgroup_bpf_enabled(type))   {                                      \
                 lock_sock(sk);                                                 \
                 __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
-                                                         t_ctx);              \
+                                                         t_ctx,               \
+                                                         &__unused_flags);    \
                 release_sock(sk);                                              \
         }                                                                      \
         __ret;                                                                 \
  })
  
-#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr)                        \
-       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_BIND, NULL)
-
-#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr)                        \
-       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_BIND, NULL)
+/* BPF_CGROUP_INET4_BIND and BPF_CGROUP_INET6_BIND can return extra flags
+ * via upper bits of return code. The only flag that is supported
+ * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check
+ * should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE).
+ */
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, bind_flags)               \
+({                                                                            \
+       u32 __flags = 0;                                                       \
+       int __ret = 0;                                                         \
+       if (cgroup_bpf_enabled(type))   {                                      \
+               lock_sock(sk);                                                 \
+               __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
+                                                         NULL, &__flags);     \
+               release_sock(sk);                                              \
+               if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE)            \
+                       *bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE;           \
+       }                                                                      \
+       __ret;                                                                 \
+})
  
  #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk)                                    \
         ((cgroup_bpf_enabled(BPF_CGROUP_INET4_CONNECT) ||                      \
@@ -453,8 +472,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
  #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
  #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
  #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, flags) ({ 0; })
  #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
  #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
  #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
diff --git a/include/linux/bpf.h b/include/linux/bpf.h

index 1aac2af..321966f 100644 (file)
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1073,6 +1073,34 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
                         struct bpf_prog *include_prog,
                         struct bpf_prog_array **new_array);
  
+/* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
+#define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE                   (1 << 0)
+/* BPF program asks to set CN on the packet. */
+#define BPF_RET_SET_CN                                         (1 << 0)
+
+#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags)          \
+       ({                                                              \
+               struct bpf_prog_array_item *_item;                      \
+               struct bpf_prog *_prog;                                 \
+               struct bpf_prog_array *_array;                          \
+               u32 _ret = 1;                                           \
+               u32 func_ret;                                           \
+               migrate_disable();                                      \
+               rcu_read_lock();                                        \
+               _array = rcu_dereference(array);                        \
+               _item = &_array->items[0];                              \
+               while ((_prog = READ_ONCE(_item->prog))) {              \
+                       bpf_cgroup_storage_set(_item->cgroup_storage);  \
+                       func_ret = func(_prog, ctx);                    \
+                       _ret &= (func_ret & 1);                         \
+                       *(ret_flags) |= (func_ret >> 1);                        \
+                       _item++;                                        \
+               }                                                       \
+               rcu_read_unlock();                                      \
+               migrate_enable();                                       \
+               _ret;                                                   \
+        })
+
  #define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \
         ({                                              \
                 struct bpf_prog_array_item *_item;      \
@@ -1120,25 +1148,11 @@ _out:                                                   \
   */
  #define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func)                \
         ({                                              \
-               struct bpf_prog_array_item *_item;      \
-               struct bpf_prog *_prog;                 \
-               struct bpf_prog_array *_array;          \
-               u32 ret;                                \
-               u32 _ret = 1;                           \
-               u32 _cn = 0;                            \
-               migrate_disable();                      \
-               rcu_read_lock();                        \
-               _array = rcu_dereference(array);        \
-               _item = &_array->items[0];              \
-               while ((_prog = READ_ONCE(_item->prog))) {              \
-                       bpf_cgroup_storage_set(_item->cgroup_storage);  \
-                       ret = func(_prog, ctx);         \
-                       _ret &= (ret & 1);              \
-                       _cn |= (ret & 2);               \
-                       _item++;                        \
-               }                                       \
-               rcu_read_unlock();                      \
-               migrate_enable();                       \
+               u32 _flags = 0;                         \
+               bool _cn;                               \
+               u32 _ret;                               \
+               _ret = BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, &_flags); \
+               _cn = _flags & BPF_RET_SET_CN;          \
                 if (_ret)                               \
                         _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);  \
                 else                                    \
diff --git a/include/net/inet_common.h b/include/net/inet_common.h

index cb28188..cad2a61 100644 (file)
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -41,6 +41,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
  #define BIND_WITH_LOCK                 (1 << 1)
  /* Called from BPF program. */
  #define BIND_FROM_BPF                  (1 << 2)
+/* Skip CAP_NET_BIND_SERVICE check. */
+#define BIND_NO_CAP_NET_BIND_SERVICE   (1 << 3)
  int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
                 u32 flags);
  int inet_getname(struct socket *sock, struct sockaddr *uaddr,
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c

index da649f2..cdf3c7e 100644 (file)
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1055,6 +1055,8 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
   * @uaddr: sockaddr struct provided by user
   * @type: The type of program to be exectuted
   * @t_ctx: Pointer to attach type specific context
+ * @flags: Pointer to u32 which contains higher bits of BPF program
+ *         return value (OR'ed together).
   *
   * socket is expected to be of type INET or INET6.
   *
@@ -1064,7 +1066,8 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
  int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
                                       struct sockaddr *uaddr,
                                       enum bpf_attach_type type,
-                                     void *t_ctx)
+                                     void *t_ctx,
+                                     u32 *flags)
  {
         struct bpf_sock_addr_kern ctx = {
                 .sk = sk,
@@ -1087,7 +1090,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
         }
  
         cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-       ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
+       ret = BPF_PROG_RUN_ARRAY_FLAGS(cgrp->bpf.effective[type], &ctx,
+                                      BPF_PROG_RUN, flags);
  
         return ret == 1 ? 0 : -EPERM;
  }
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c

index d0eae51..972fc38 100644 (file)
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7986,6 +7986,9 @@ static int check_return_code(struct bpf_verifier_env *env)
                     env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
                     env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
                         range = tnum_range(1, 1);
+               if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
+                   env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
+                       range = tnum_range(0, 3);
                 break;
         case BPF_PROG_TYPE_CGROUP_SKB:
                 if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c

index 6ba2930..aaa94be 100644 (file)
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -438,6 +438,7 @@ EXPORT_SYMBOL(inet_release);
  int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
  {
         struct sock *sk = sock->sk;
+       u32 flags = BIND_WITH_LOCK;
         int err;
  
         /* If the socket has its own bind function then use it. (RAW) */
@@ -450,11 +451,12 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
         /* BPF prog is run before any checks are done so that if the prog
          * changes context in a wrong way it will be caught.
          */
-       err = BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr);
+       err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
+                                                BPF_CGROUP_INET4_BIND, &flags);
         if (err)
                 return err;
  
-       return __inet_bind(sk, uaddr, addr_len, BIND_WITH_LOCK);
+       return __inet_bind(sk, uaddr, addr_len, flags);
  }
  EXPORT_SYMBOL(inet_bind);
  
@@ -499,7 +501,8 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
  
         snum = ntohs(addr->sin_port);
         err = -EACCES;
-       if (snum && inet_port_requires_bind_service(net, snum) &&
+       if (!(flags & BIND_NO_CAP_NET_BIND_SERVICE) &&
+           snum && inet_port_requires_bind_service(net, snum) &&
             !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
                 goto out;
  
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c

index b9c6548..f091fe9 100644 (file)
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -295,7 +295,8 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
                 return -EINVAL;
  
         snum = ntohs(addr->sin6_port);
-       if (snum && inet_port_requires_bind_service(net, snum) &&
+       if (!(flags & BIND_NO_CAP_NET_BIND_SERVICE) &&
+           snum && inet_port_requires_bind_service(net, snum) &&
             !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
                 return -EACCES;
  
@@ -439,6 +440,7 @@ out_unlock:
  int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
  {
         struct sock *sk = sock->sk;
+       u32 flags = BIND_WITH_LOCK;
         int err = 0;
  
         /* If the socket has its own bind function then use it. */
@@ -451,11 +453,12 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
         /* BPF prog is run before any checks are done so that if the prog
          * changes context in a wrong way it will be caught.
          */
-       err = BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr);
+       err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
+                                                BPF_CGROUP_INET6_BIND, &flags);
         if (err)
                 return err;
  
-       return __inet6_bind(sk, uaddr, addr_len, BIND_WITH_LOCK);
+       return __inet6_bind(sk, uaddr, addr_len, flags);
  }
  EXPORT_SYMBOL(inet6_bind);
author	Stanislav Fomichev <sdf@google.com>
	Wed, 27 Jan 2021 19:31:39 +0000 (11:31 -0800)
committer	Alexei Starovoitov <ast@kernel.org>
	Thu, 28 Jan 2021 02:18:15 +0000 (18:18 -0800)
include/linux/bpf-cgroup.h		patch \| blob \| history
include/linux/bpf.h		patch \| blob \| history
include/net/inet_common.h		patch \| blob \| history
kernel/bpf/cgroup.c		patch \| blob \| history
kernel/bpf/verifier.c		patch \| blob \| history
net/ipv4/af_inet.c		patch \| blob \| history
net/ipv6/af_inet6.c		patch \| blob \| history