OSDN Git Service

bpf: Allow rewriting to ports under ip_unprivileged_port_start
authorStanislav Fomichev <sdf@google.com>
Wed, 27 Jan 2021 19:31:39 +0000 (11:31 -0800)
committerAlexei Starovoitov <ast@kernel.org>
Thu, 28 Jan 2021 02:18:15 +0000 (18:18 -0800)
At the moment, BPF_CGROUP_INET{4,6}_BIND hooks can rewrite user_port
to the privileged ones (< ip_unprivileged_port_start), but it will
be rejected later on in the __inet_bind or __inet6_bind.

Let's add another return value to indicate that CAP_NET_BIND_SERVICE
check should be ignored. Use the same idea as we currently use
in cgroup/egress where bit #1 indicates CN. Instead, for
cgroup/bind{4,6}, bit #1 indicates that CAP_NET_BIND_SERVICE should
be bypassed.

v5:
- rename flags to be less confusing (Andrey Ignatov)
- rework BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY to work on flags
  and accept BPF_RET_SET_CN (no behavioral changes)

v4:
- Add missing IPv6 support (Martin KaFai Lau)

v3:
- Update description (Martin KaFai Lau)
- Fix capability restore in selftest (Martin KaFai Lau)

v2:
- Switch to explicit return code (Martin KaFai Lau)

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Andrey Ignatov <rdna@fb.com>
Link: https://lore.kernel.org/bpf/20210127193140.3170382-1-sdf@google.com
include/linux/bpf-cgroup.h
include/linux/bpf.h
include/net/inet_common.h
kernel/bpf/cgroup.c
kernel/bpf/verifier.c
net/ipv4/af_inet.c
net/ipv6/af_inet6.c

index 0748fd8..c42e02b 100644 (file)
@@ -125,7 +125,8 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
 int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
                                      struct sockaddr *uaddr,
                                      enum bpf_attach_type type,
-                                     void *t_ctx);
+                                     void *t_ctx,
+                                     u32 *flags);
 
 int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
                                     struct bpf_sock_ops_kern *sock_ops,
@@ -231,30 +232,48 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 
 #define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type)                                       \
 ({                                                                            \
+       u32 __unused_flags;                                                    \
        int __ret = 0;                                                         \
        if (cgroup_bpf_enabled(type))                                          \
                __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
-                                                         NULL);               \
+                                                         NULL,                \
+                                                         &__unused_flags);    \
        __ret;                                                                 \
 })
 
 #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx)                   \
 ({                                                                            \
+       u32 __unused_flags;                                                    \
        int __ret = 0;                                                         \
        if (cgroup_bpf_enabled(type))   {                                      \
                lock_sock(sk);                                                 \
                __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
-                                                         t_ctx);              \
+                                                         t_ctx,               \
+                                                         &__unused_flags);    \
                release_sock(sk);                                              \
        }                                                                      \
        __ret;                                                                 \
 })
 
-#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr)                        \
-       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_BIND, NULL)
-
-#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr)                        \
-       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_BIND, NULL)
+/* BPF_CGROUP_INET4_BIND and BPF_CGROUP_INET6_BIND can return extra flags
+ * via upper bits of return code. The only flag that is supported
+ * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check
+ * should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE).
+ */
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, bind_flags)               \
+({                                                                            \
+       u32 __flags = 0;                                                       \
+       int __ret = 0;                                                         \
+       if (cgroup_bpf_enabled(type))   {                                      \
+               lock_sock(sk);                                                 \
+               __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
+                                                         NULL, &__flags);     \
+               release_sock(sk);                                              \
+               if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE)            \
+                       *bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE;           \
+       }                                                                      \
+       __ret;                                                                 \
+})
 
 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk)                                    \
        ((cgroup_bpf_enabled(BPF_CGROUP_INET4_CONNECT) ||                      \
@@ -453,8 +472,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, flags) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
index 1aac2af..321966f 100644 (file)
@@ -1073,6 +1073,34 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
                        struct bpf_prog *include_prog,
                        struct bpf_prog_array **new_array);
 
+/* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
+#define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE                   (1 << 0)
+/* BPF program asks to set CN on the packet. */
+#define BPF_RET_SET_CN                                         (1 << 0)
+
+#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags)          \
+       ({                                                              \
+               struct bpf_prog_array_item *_item;                      \
+               struct bpf_prog *_prog;                                 \
+               struct bpf_prog_array *_array;                          \
+               u32 _ret = 1;                                           \
+               u32 func_ret;                                           \
+               migrate_disable();                                      \
+               rcu_read_lock();                                        \
+               _array = rcu_dereference(array);                        \
+               _item = &_array->items[0];                              \
+               while ((_prog = READ_ONCE(_item->prog))) {              \
+                       bpf_cgroup_storage_set(_item->cgroup_storage);  \
+                       func_ret = func(_prog, ctx);                    \
+                       _ret &= (func_ret & 1);                         \
+                       *(ret_flags) |= (func_ret >> 1);                        \
+                       _item++;                                        \
+               }                                                       \
+               rcu_read_unlock();                                      \
+               migrate_enable();                                       \
+               _ret;                                                   \
+        })
+
 #define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \
        ({                                              \
                struct bpf_prog_array_item *_item;      \
@@ -1120,25 +1148,11 @@ _out:                                                   \
  */
 #define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func)                \
        ({                                              \
-               struct bpf_prog_array_item *_item;      \
-               struct bpf_prog *_prog;                 \
-               struct bpf_prog_array *_array;          \
-               u32 ret;                                \
-               u32 _ret = 1;                           \
-               u32 _cn = 0;                            \
-               migrate_disable();                      \
-               rcu_read_lock();                        \
-               _array = rcu_dereference(array);        \
-               _item = &_array->items[0];              \
-               while ((_prog = READ_ONCE(_item->prog))) {              \
-                       bpf_cgroup_storage_set(_item->cgroup_storage);  \
-                       ret = func(_prog, ctx);         \
-                       _ret &= (ret & 1);              \
-                       _cn |= (ret & 2);               \
-                       _item++;                        \
-               }                                       \
-               rcu_read_unlock();                      \
-               migrate_enable();                       \
+               u32 _flags = 0;                         \
+               bool _cn;                               \
+               u32 _ret;                               \
+               _ret = BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, &_flags); \
+               _cn = _flags & BPF_RET_SET_CN;          \
                if (_ret)                               \
                        _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);  \
                else                                    \
index cb28188..cad2a61 100644 (file)
@@ -41,6 +41,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
 #define BIND_WITH_LOCK                 (1 << 1)
 /* Called from BPF program. */
 #define BIND_FROM_BPF                  (1 << 2)
+/* Skip CAP_NET_BIND_SERVICE check. */
+#define BIND_NO_CAP_NET_BIND_SERVICE   (1 << 3)
 int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
                u32 flags);
 int inet_getname(struct socket *sock, struct sockaddr *uaddr,
index da649f2..cdf3c7e 100644 (file)
@@ -1055,6 +1055,8 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
  * @uaddr: sockaddr struct provided by user
  * @type: The type of program to be exectuted
  * @t_ctx: Pointer to attach type specific context
+ * @flags: Pointer to u32 which contains higher bits of BPF program
+ *         return value (OR'ed together).
  *
  * socket is expected to be of type INET or INET6.
  *
@@ -1064,7 +1066,8 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
 int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
                                      struct sockaddr *uaddr,
                                      enum bpf_attach_type type,
-                                     void *t_ctx)
+                                     void *t_ctx,
+                                     u32 *flags)
 {
        struct bpf_sock_addr_kern ctx = {
                .sk = sk,
@@ -1087,7 +1090,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
        }
 
        cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-       ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
+       ret = BPF_PROG_RUN_ARRAY_FLAGS(cgrp->bpf.effective[type], &ctx,
+                                      BPF_PROG_RUN, flags);
 
        return ret == 1 ? 0 : -EPERM;
 }
index d0eae51..972fc38 100644 (file)
@@ -7986,6 +7986,9 @@ static int check_return_code(struct bpf_verifier_env *env)
                    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
                    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
                        range = tnum_range(1, 1);
+               if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
+                   env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
+                       range = tnum_range(0, 3);
                break;
        case BPF_PROG_TYPE_CGROUP_SKB:
                if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
index 6ba2930..aaa94be 100644 (file)
@@ -438,6 +438,7 @@ EXPORT_SYMBOL(inet_release);
 int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
        struct sock *sk = sock->sk;
+       u32 flags = BIND_WITH_LOCK;
        int err;
 
        /* If the socket has its own bind function then use it. (RAW) */
@@ -450,11 +451,12 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        /* BPF prog is run before any checks are done so that if the prog
         * changes context in a wrong way it will be caught.
         */
-       err = BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr);
+       err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
+                                                BPF_CGROUP_INET4_BIND, &flags);
        if (err)
                return err;
 
-       return __inet_bind(sk, uaddr, addr_len, BIND_WITH_LOCK);
+       return __inet_bind(sk, uaddr, addr_len, flags);
 }
 EXPORT_SYMBOL(inet_bind);
 
@@ -499,7 +501,8 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 
        snum = ntohs(addr->sin_port);
        err = -EACCES;
-       if (snum && inet_port_requires_bind_service(net, snum) &&
+       if (!(flags & BIND_NO_CAP_NET_BIND_SERVICE) &&
+           snum && inet_port_requires_bind_service(net, snum) &&
            !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
                goto out;
 
index b9c6548..f091fe9 100644 (file)
@@ -295,7 +295,8 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
                return -EINVAL;
 
        snum = ntohs(addr->sin6_port);
-       if (snum && inet_port_requires_bind_service(net, snum) &&
+       if (!(flags & BIND_NO_CAP_NET_BIND_SERVICE) &&
+           snum && inet_port_requires_bind_service(net, snum) &&
            !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
                return -EACCES;
 
@@ -439,6 +440,7 @@ out_unlock:
 int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
        struct sock *sk = sock->sk;
+       u32 flags = BIND_WITH_LOCK;
        int err = 0;
 
        /* If the socket has its own bind function then use it. */
@@ -451,11 +453,12 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        /* BPF prog is run before any checks are done so that if the prog
         * changes context in a wrong way it will be caught.
         */
-       err = BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr);
+       err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
+                                                BPF_CGROUP_INET6_BIND, &flags);
        if (err)
                return err;
 
-       return __inet6_bind(sk, uaddr, addr_len, BIND_WITH_LOCK);
+       return __inet6_bind(sk, uaddr, addr_len, flags);
 }
 EXPORT_SYMBOL(inet6_bind);