OSDN Git Service

bpf: Enable non-atomic allocations in local storage
authorJoanne Koong <joannelkoong@gmail.com>
Fri, 18 Mar 2022 04:55:52 +0000 (21:55 -0700)
committerAlexei Starovoitov <ast@kernel.org>
Mon, 21 Mar 2022 01:55:05 +0000 (18:55 -0700)
Currently, local storage memory can only be allocated atomically
(GFP_ATOMIC). This restriction is too strict for sleepable bpf
programs.

In this patch, the verifier detects whether the program is sleepable,
and passes the corresponding GFP_KERNEL or GFP_ATOMIC flag as a
5th argument to bpf_task/sk/inode_storage_get. This flag will propagate
down to the local storage functions that allocate memory.

Please note that bpf_task/sk/inode_storage_update_elem functions are
invoked by userspace applications through syscalls. Preemption is
disabled before bpf_task/sk/inode_storage_update_elem is called, which
means they will always have to allocate memory atomically.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: KP Singh <kpsingh@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20220318045553.3091807-2-joannekoong@fb.com
include/linux/bpf_local_storage.h
kernel/bpf/bpf_inode_storage.c
kernel/bpf/bpf_local_storage.c
kernel/bpf/bpf_task_storage.c
kernel/bpf/verifier.c
net/core/bpf_sk_storage.c

index 37b3906..493e632 100644 (file)
@@ -154,16 +154,17 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem);
 
 struct bpf_local_storage_elem *
 bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
-               bool charge_mem);
+               bool charge_mem, gfp_t gfp_flags);
 
 int
 bpf_local_storage_alloc(void *owner,
                        struct bpf_local_storage_map *smap,
-                       struct bpf_local_storage_elem *first_selem);
+                       struct bpf_local_storage_elem *first_selem,
+                       gfp_t gfp_flags);
 
 struct bpf_local_storage_data *
 bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
-                        void *value, u64 map_flags);
+                        void *value, u64 map_flags, gfp_t gfp_flags);
 
 void bpf_local_storage_free_rcu(struct rcu_head *rcu);
 
index e29d9e3..96be8d5 100644 (file)
@@ -136,7 +136,7 @@ static int bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key,
 
        sdata = bpf_local_storage_update(f->f_inode,
                                         (struct bpf_local_storage_map *)map,
-                                        value, map_flags);
+                                        value, map_flags, GFP_ATOMIC);
        fput(f);
        return PTR_ERR_OR_ZERO(sdata);
 }
@@ -169,8 +169,9 @@ static int bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key)
        return err;
 }
 
-BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
-          void *, value, u64, flags)
+/* *gfp_flags* is a hidden argument provided by the verifier */
+BPF_CALL_5(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
+          void *, value, u64, flags, gfp_t, gfp_flags)
 {
        struct bpf_local_storage_data *sdata;
 
@@ -196,7 +197,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
        if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
                sdata = bpf_local_storage_update(
                        inode, (struct bpf_local_storage_map *)map, value,
-                       BPF_NOEXIST);
+                       BPF_NOEXIST, gfp_flags);
                return IS_ERR(sdata) ? (unsigned long)NULL :
                                             (unsigned long)sdata->data;
        }
index 092a1ac..01aa2b5 100644 (file)
@@ -63,7 +63,7 @@ static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
 
 struct bpf_local_storage_elem *
 bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
-               void *value, bool charge_mem)
+               void *value, bool charge_mem, gfp_t gfp_flags)
 {
        struct bpf_local_storage_elem *selem;
 
@@ -71,7 +71,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
                return NULL;
 
        selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
-                               GFP_ATOMIC | __GFP_NOWARN);
+                               gfp_flags | __GFP_NOWARN);
        if (selem) {
                if (value)
                        memcpy(SDATA(selem)->data, value, smap->map.value_size);
@@ -282,7 +282,8 @@ static int check_flags(const struct bpf_local_storage_data *old_sdata,
 
 int bpf_local_storage_alloc(void *owner,
                            struct bpf_local_storage_map *smap,
-                           struct bpf_local_storage_elem *first_selem)
+                           struct bpf_local_storage_elem *first_selem,
+                           gfp_t gfp_flags)
 {
        struct bpf_local_storage *prev_storage, *storage;
        struct bpf_local_storage **owner_storage_ptr;
@@ -293,7 +294,7 @@ int bpf_local_storage_alloc(void *owner,
                return err;
 
        storage = bpf_map_kzalloc(&smap->map, sizeof(*storage),
-                                 GFP_ATOMIC | __GFP_NOWARN);
+                                 gfp_flags | __GFP_NOWARN);
        if (!storage) {
                err = -ENOMEM;
                goto uncharge;
@@ -350,10 +351,10 @@ uncharge:
  */
 struct bpf_local_storage_data *
 bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
-                        void *value, u64 map_flags)
+                        void *value, u64 map_flags, gfp_t gfp_flags)
 {
        struct bpf_local_storage_data *old_sdata = NULL;
-       struct bpf_local_storage_elem *selem;
+       struct bpf_local_storage_elem *selem = NULL;
        struct bpf_local_storage *local_storage;
        unsigned long flags;
        int err;
@@ -365,6 +366,9 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
                     !map_value_has_spin_lock(&smap->map)))
                return ERR_PTR(-EINVAL);
 
+       if (gfp_flags == GFP_KERNEL && (map_flags & ~BPF_F_LOCK) != BPF_NOEXIST)
+               return ERR_PTR(-EINVAL);
+
        local_storage = rcu_dereference_check(*owner_storage(smap, owner),
                                              bpf_rcu_lock_held());
        if (!local_storage || hlist_empty(&local_storage->list)) {
@@ -373,11 +377,11 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
                if (err)
                        return ERR_PTR(err);
 
-               selem = bpf_selem_alloc(smap, owner, value, true);
+               selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
                if (!selem)
                        return ERR_PTR(-ENOMEM);
 
-               err = bpf_local_storage_alloc(owner, smap, selem);
+               err = bpf_local_storage_alloc(owner, smap, selem, gfp_flags);
                if (err) {
                        kfree(selem);
                        mem_uncharge(smap, owner, smap->elem_size);
@@ -404,6 +408,12 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
                }
        }
 
+       if (gfp_flags == GFP_KERNEL) {
+               selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
+               if (!selem)
+                       return ERR_PTR(-ENOMEM);
+       }
+
        raw_spin_lock_irqsave(&local_storage->lock, flags);
 
        /* Recheck local_storage->list under local_storage->lock */
@@ -429,19 +439,21 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
                goto unlock;
        }
 
-       /* local_storage->lock is held.  Hence, we are sure
-        * we can unlink and uncharge the old_sdata successfully
-        * later.  Hence, instead of charging the new selem now
-        * and then uncharge the old selem later (which may cause
-        * a potential but unnecessary charge failure),  avoid taking
-        * a charge at all here (the "!old_sdata" check) and the
-        * old_sdata will not be uncharged later during
-        * bpf_selem_unlink_storage_nolock().
-        */
-       selem = bpf_selem_alloc(smap, owner, value, !old_sdata);
-       if (!selem) {
-               err = -ENOMEM;
-               goto unlock_err;
+       if (gfp_flags != GFP_KERNEL) {
+               /* local_storage->lock is held.  Hence, we are sure
+                * we can unlink and uncharge the old_sdata successfully
+                * later.  Hence, instead of charging the new selem now
+                * and then uncharge the old selem later (which may cause
+                * a potential but unnecessary charge failure),  avoid taking
+                * a charge at all here (the "!old_sdata" check) and the
+                * old_sdata will not be uncharged later during
+                * bpf_selem_unlink_storage_nolock().
+                */
+               selem = bpf_selem_alloc(smap, owner, value, !old_sdata, gfp_flags);
+               if (!selem) {
+                       err = -ENOMEM;
+                       goto unlock_err;
+               }
        }
 
        /* First, link the new selem to the map */
@@ -463,6 +475,10 @@ unlock:
 
 unlock_err:
        raw_spin_unlock_irqrestore(&local_storage->lock, flags);
+       if (selem) {
+               mem_uncharge(smap, owner, smap->elem_size);
+               kfree(selem);
+       }
        return ERR_PTR(err);
 }
 
index 5da7bed..6638a0e 100644 (file)
@@ -174,7 +174,8 @@ static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
 
        bpf_task_storage_lock();
        sdata = bpf_local_storage_update(
-               task, (struct bpf_local_storage_map *)map, value, map_flags);
+               task, (struct bpf_local_storage_map *)map, value, map_flags,
+               GFP_ATOMIC);
        bpf_task_storage_unlock();
 
        err = PTR_ERR_OR_ZERO(sdata);
@@ -226,8 +227,9 @@ out:
        return err;
 }
 
-BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
-          task, void *, value, u64, flags)
+/* *gfp_flags* is a hidden argument provided by the verifier */
+BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
+          task, void *, value, u64, flags, gfp_t, gfp_flags)
 {
        struct bpf_local_storage_data *sdata;
 
@@ -250,7 +252,7 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
            (flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
                sdata = bpf_local_storage_update(
                        task, (struct bpf_local_storage_map *)map, value,
-                       BPF_NOEXIST);
+                       BPF_NOEXIST, gfp_flags);
 
 unlock:
        bpf_task_storage_unlock();
index 0287176..6347dcd 100644 (file)
@@ -13492,6 +13492,26 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
                        goto patch_call_imm;
                }
 
+               if (insn->imm == BPF_FUNC_task_storage_get ||
+                   insn->imm == BPF_FUNC_sk_storage_get ||
+                   insn->imm == BPF_FUNC_inode_storage_get) {
+                       if (env->prog->aux->sleepable)
+                               insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__s32)GFP_KERNEL);
+                       else
+                               insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__s32)GFP_ATOMIC);
+                       insn_buf[1] = *insn;
+                       cnt = 2;
+
+                       new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+                       if (!new_prog)
+                               return -ENOMEM;
+
+                       delta += cnt - 1;
+                       env->prog = prog = new_prog;
+                       insn = new_prog->insnsi + i + delta;
+                       goto patch_call_imm;
+               }
+
                /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
                 * and other inlining handlers are currently limited to 64 bit
                 * only.
index d9c37fd..7aff120 100644 (file)
@@ -141,7 +141,7 @@ static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
        if (sock) {
                sdata = bpf_local_storage_update(
                        sock->sk, (struct bpf_local_storage_map *)map, value,
-                       map_flags);
+                       map_flags, GFP_ATOMIC);
                sockfd_put(sock);
                return PTR_ERR_OR_ZERO(sdata);
        }
@@ -172,7 +172,7 @@ bpf_sk_storage_clone_elem(struct sock *newsk,
 {
        struct bpf_local_storage_elem *copy_selem;
 
-       copy_selem = bpf_selem_alloc(smap, newsk, NULL, true);
+       copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, GFP_ATOMIC);
        if (!copy_selem)
                return NULL;
 
@@ -230,7 +230,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
                        bpf_selem_link_map(smap, copy_selem);
                        bpf_selem_link_storage_nolock(new_sk_storage, copy_selem);
                } else {
-                       ret = bpf_local_storage_alloc(newsk, smap, copy_selem);
+                       ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC);
                        if (ret) {
                                kfree(copy_selem);
                                atomic_sub(smap->elem_size,
@@ -255,8 +255,9 @@ out:
        return ret;
 }
 
-BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
-          void *, value, u64, flags)
+/* *gfp_flags* is a hidden argument provided by the verifier */
+BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
+          void *, value, u64, flags, gfp_t, gfp_flags)
 {
        struct bpf_local_storage_data *sdata;
 
@@ -277,7 +278,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
            refcount_inc_not_zero(&sk->sk_refcnt)) {
                sdata = bpf_local_storage_update(
                        sk, (struct bpf_local_storage_map *)map, value,
-                       BPF_NOEXIST);
+                       BPF_NOEXIST, gfp_flags);
                /* sk must be a fullsock (guaranteed by verifier),
                 * so sock_gen_put() is unnecessary.
                 */
@@ -417,14 +418,16 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
        return false;
 }
 
-BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
-          void *, value, u64, flags)
+/* *gfp_flags* is a hidden argument provided by the verifier */
+BPF_CALL_5(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
+          void *, value, u64, flags, gfp_t, gfp_flags)
 {
        WARN_ON_ONCE(!bpf_rcu_lock_held());
        if (in_hardirq() || in_nmi())
                return (unsigned long)NULL;
 
-       return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags);
+       return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags,
+                                                    gfp_flags);
 }
 
 BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map,