OSDN Git Service

bpf: Fix NULL pointer dereference in bpf_get_local_storage() helper
authorYonghong Song <yhs@fb.com>
Tue, 23 Mar 2021 05:51:46 +0000 (22:51 -0700)
committerAlexei Starovoitov <ast@kernel.org>
Fri, 26 Mar 2021 01:31:36 +0000 (18:31 -0700)
Jiri Olsa reported a bug ([1]) in kernel where cgroup local
storage pointer may be NULL in bpf_get_local_storage() helper.
There are two issues uncovered by this bug:
  (1). kprobe or tracepoint prog incorrectly sets cgroup local storage
       before prog run,
  (2). due to change from preempt_disable to migrate_disable,
       preemption is possible and percpu storage might be overwritten
       by other tasks.

This issue (1) is fixed in [2]. This patch tried to address issue (2).
The following shows how things can go wrong:
  task 1:   bpf_cgroup_storage_set() for percpu local storage
         preemption happens
  task 2:   bpf_cgroup_storage_set() for percpu local storage
         preemption happens
  task 1:   run bpf program

task 1 will effectively use the percpu local storage setting by task 2
which will be either NULL or incorrect ones.

Instead of just one common local storage per cpu, this patch fixed
the issue by permitting 8 local storages per cpu and each local
storage is identified by a task_struct pointer. This way, we
allow at most 8 nested preemption between bpf_cgroup_storage_set()
and bpf_cgroup_storage_unset(). The percpu local storage slot
is released (calling bpf_cgroup_storage_unset()) by the same task
after bpf program finished running.
bpf_test_run() is also fixed to use the new bpf_cgroup_storage_set()
interface.

The patch is tested on top of [2] with reproducer in [1].
Without this patch, kernel will emit error in 2-3 minutes.
With this patch, after one hour, still no error.

 [1] https://lore.kernel.org/bpf/CAKH8qBuXCfUz=w8L+Fj74OaUpbosO29niYwTki7e3Ag044_aww@mail.gmail.com/T
 [2] https://lore.kernel.org/bpf/20210309185028.3763817-1-yhs@fb.com

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Roman Gushchin <guro@fb.com>
Link: https://lore.kernel.org/bpf/20210323055146.3334476-1-yhs@fb.com
include/linux/bpf-cgroup.h
include/linux/bpf.h
kernel/bpf/helpers.c
kernel/bpf/local_storage.c
net/bpf/test_run.c

index c42e02b..6a29fe1 100644 (file)
@@ -20,14 +20,25 @@ struct bpf_sock_ops_kern;
 struct bpf_cgroup_storage;
 struct ctl_table;
 struct ctl_table_header;
+struct task_struct;
 
 #ifdef CONFIG_CGROUP_BPF
 
 extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
 #define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])
 
-DECLARE_PER_CPU(struct bpf_cgroup_storage*,
-               bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
+#define BPF_CGROUP_STORAGE_NEST_MAX    8
+
+struct bpf_cgroup_storage_info {
+       struct task_struct *task;
+       struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
+};
+
+/* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks
+ * to use bpf cgroup storage simultaneously.
+ */
+DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
+               bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
 
 #define for_each_cgroup_storage_type(stype) \
        for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
@@ -161,13 +172,42 @@ static inline enum bpf_cgroup_storage_type cgroup_storage_type(
        return BPF_CGROUP_STORAGE_SHARED;
 }
 
-static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage
-                                         *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
+static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage
+                                        *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
 {
        enum bpf_cgroup_storage_type stype;
+       int i, err = 0;
+
+       preempt_disable();
+       for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
+               if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL))
+                       continue;
+
+               this_cpu_write(bpf_cgroup_storage_info[i].task, current);
+               for_each_cgroup_storage_type(stype)
+                       this_cpu_write(bpf_cgroup_storage_info[i].storage[stype],
+                                      storage[stype]);
+               goto out;
+       }
+       err = -EBUSY;
+       WARN_ON_ONCE(1);
+
+out:
+       preempt_enable();
+       return err;
+}
+
+static inline void bpf_cgroup_storage_unset(void)
+{
+       int i;
+
+       for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
+               if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
+                       continue;
 
-       for_each_cgroup_storage_type(stype)
-               this_cpu_write(bpf_cgroup_storage[stype], storage[stype]);
+               this_cpu_write(bpf_cgroup_storage_info[i].task, NULL);
+               return;
+       }
 }
 
 struct bpf_cgroup_storage *
@@ -448,8 +488,9 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
        return -EINVAL;
 }
 
-static inline void bpf_cgroup_storage_set(
-       struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {}
+static inline int bpf_cgroup_storage_set(
+       struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; }
+static inline void bpf_cgroup_storage_unset(void) {}
 static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
                                            struct bpf_map *map) { return 0; }
 static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
index 24678d6..5a0801b 100644 (file)
@@ -1106,6 +1106,13 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
 /* BPF program asks to set CN on the packet. */
 #define BPF_RET_SET_CN                                         (1 << 0)
 
+/* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY,
+ * if bpf_cgroup_storage_set() failed, the rest of programs
+ * will not execute. This should be a really rare scenario
+ * as it requires BPF_CGROUP_STORAGE_NEST_MAX number of
+ * preemptions all between bpf_cgroup_storage_set() and
+ * bpf_cgroup_storage_unset() on the same cpu.
+ */
 #define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags)          \
        ({                                                              \
                struct bpf_prog_array_item *_item;                      \
@@ -1118,10 +1125,12 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
                _array = rcu_dereference(array);                        \
                _item = &_array->items[0];                              \
                while ((_prog = READ_ONCE(_item->prog))) {              \
-                       bpf_cgroup_storage_set(_item->cgroup_storage);  \
+                       if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage)))    \
+                               break;                                  \
                        func_ret = func(_prog, ctx);                    \
                        _ret &= (func_ret & 1);                         \
                        *(ret_flags) |= (func_ret >> 1);                        \
+                       bpf_cgroup_storage_unset();                     \
                        _item++;                                        \
                }                                                       \
                rcu_read_unlock();                                      \
@@ -1142,9 +1151,14 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
                        goto _out;                      \
                _item = &_array->items[0];              \
                while ((_prog = READ_ONCE(_item->prog))) {              \
-                       if (set_cg_storage)             \
-                               bpf_cgroup_storage_set(_item->cgroup_storage);  \
-                       _ret &= func(_prog, ctx);       \
+                       if (!set_cg_storage) {                  \
+                               _ret &= func(_prog, ctx);       \
+                       } else {                                \
+                               if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage)))    \
+                                       break;                  \
+                               _ret &= func(_prog, ctx);       \
+                               bpf_cgroup_storage_unset();     \
+                       }                               \
                        _item++;                        \
                }                                       \
 _out:                                                  \
index 0748002..f306611 100644 (file)
@@ -382,8 +382,8 @@ const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
 };
 
 #ifdef CONFIG_CGROUP_BPF
-DECLARE_PER_CPU(struct bpf_cgroup_storage*,
-               bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
+DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
+               bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
 
 BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
 {
@@ -392,10 +392,17 @@ BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
         * verifier checks that its value is correct.
         */
        enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
-       struct bpf_cgroup_storage *storage;
+       struct bpf_cgroup_storage *storage = NULL;
        void *ptr;
+       int i;
 
-       storage = this_cpu_read(bpf_cgroup_storage[stype]);
+       for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
+               if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
+                       continue;
+
+               storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]);
+               break;
+       }
 
        if (stype == BPF_CGROUP_STORAGE_SHARED)
                ptr = &READ_ONCE(storage->buf)->data[0];
index 2d4f9ac..bd11db9 100644 (file)
@@ -9,10 +9,11 @@
 #include <linux/slab.h>
 #include <uapi/linux/btf.h>
 
-DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
-
 #ifdef CONFIG_CGROUP_BPF
 
+DEFINE_PER_CPU(struct bpf_cgroup_storage_info,
+              bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
+
 #include "../cgroup/cgroup-internal.h"
 
 #define LOCAL_STORAGE_CREATE_FLAG_MASK                                 \
index 0abdd67..4aabf71 100644 (file)
@@ -106,12 +106,16 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
 
        bpf_test_timer_enter(&t);
        do {
-               bpf_cgroup_storage_set(storage);
+               ret = bpf_cgroup_storage_set(storage);
+               if (ret)
+                       break;
 
                if (xdp)
                        *retval = bpf_prog_run_xdp(prog, ctx);
                else
                        *retval = BPF_PROG_RUN(prog, ctx);
+
+               bpf_cgroup_storage_unset();
        } while (bpf_test_timer_continue(&t, repeat, &ret, time));
        bpf_test_timer_leave(&t);