OSDN Git Service

bpf: Parameterize task iterators.
authorKui-Feng Lee <kuifeng@fb.com>
Mon, 26 Sep 2022 18:49:53 +0000 (11:49 -0700)
committerAndrii Nakryiko <andrii@kernel.org>
Wed, 28 Sep 2022 23:29:47 +0000 (16:29 -0700)
Allow creating an iterator that loops through resources of one
thread/process.

People could only create iterators to loop through all resources of
files, vma, and tasks in the system, even though they were interested
in only the resources of a specific task or process.  Passing the
additional parameters, people can now create an iterator to go
through all resources or only the resources of a task.

Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/bpf/20220926184957.208194-2-kuifeng@fb.com
include/linux/bpf.h
include/uapi/linux/bpf.h
kernel/bpf/task_iter.c
tools/include/uapi/linux/bpf.h

index 5161fac..0f3eaf3 100644 (file)
@@ -1796,6 +1796,27 @@ int bpf_obj_get_user(const char __user *pathname, int flags);
        extern int bpf_iter_ ## target(args);                   \
        int __init bpf_iter_ ## target(args) { return 0; }
 
+/*
+ * The task type of iterators.
+ *
+ * For BPF task iterators, they can be parameterized with various
+ * parameters to visit only some of tasks.
+ *
+ * BPF_TASK_ITER_ALL (default)
+ *     Iterate over resources of every task.
+ *
+ * BPF_TASK_ITER_TID
+ *     Iterate over resources of a task/tid.
+ *
+ * BPF_TASK_ITER_TGID
+ *     Iterate over resources of every task of a process / task group.
+ */
+enum bpf_iter_task_type {
+       BPF_TASK_ITER_ALL = 0,
+       BPF_TASK_ITER_TID,
+       BPF_TASK_ITER_TGID,
+};
+
 struct bpf_iter_aux_info {
        /* for map_elem iter */
        struct bpf_map *map;
@@ -1805,6 +1826,10 @@ struct bpf_iter_aux_info {
                struct cgroup *start; /* starting cgroup */
                enum bpf_cgroup_iter_order order;
        } cgroup;
+       struct {
+               enum bpf_iter_task_type type;
+               u32 pid;
+       } task;
 };
 
 typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
index d6bd107..455b21a 100644 (file)
@@ -110,6 +110,12 @@ union bpf_iter_link_info {
                __u32   cgroup_fd;
                __u64   cgroup_id;
        } cgroup;
+       /* Parameters of task iterators. */
+       struct {
+               __u32   tid;
+               __u32   pid;
+               __u32   pid_fd;
+       } task;
 };
 
 /* BPF syscall commands, see bpf(2) man-page for more details. */
index 8c92179..8b2f47e 100644 (file)
@@ -12,6 +12,9 @@
 
 struct bpf_iter_seq_task_common {
        struct pid_namespace *ns;
+       enum bpf_iter_task_type type;
+       u32 pid;
+       u32 pid_visiting;
 };
 
 struct bpf_iter_seq_task_info {
@@ -22,18 +25,115 @@ struct bpf_iter_seq_task_info {
        u32 tid;
 };
 
-static struct task_struct *task_seq_get_next(struct pid_namespace *ns,
+static struct task_struct *task_group_seq_get_next(struct bpf_iter_seq_task_common *common,
+                                                  u32 *tid,
+                                                  bool skip_if_dup_files)
+{
+       struct task_struct *task, *next_task;
+       struct pid *pid;
+       u32 saved_tid;
+
+       if (!*tid) {
+               /* The first time, the iterator calls this function. */
+               pid = find_pid_ns(common->pid, common->ns);
+               if (!pid)
+                       return NULL;
+
+               task = get_pid_task(pid, PIDTYPE_TGID);
+               if (!task)
+                       return NULL;
+
+               *tid = common->pid;
+               common->pid_visiting = common->pid;
+
+               return task;
+       }
+
+       /* If the control returns to user space and comes back to the
+        * kernel again, *tid and common->pid_visiting should be the
+        * same for task_seq_start() to pick up the correct task.
+        */
+       if (*tid == common->pid_visiting) {
+               pid = find_pid_ns(common->pid_visiting, common->ns);
+               task = get_pid_task(pid, PIDTYPE_PID);
+
+               return task;
+       }
+
+       pid = find_pid_ns(common->pid_visiting, common->ns);
+       if (!pid)
+               return NULL;
+
+       task = get_pid_task(pid, PIDTYPE_PID);
+       if (!task)
+               return NULL;
+
+retry:
+       if (!pid_alive(task)) {
+               put_task_struct(task);
+               return NULL;
+       }
+
+       next_task = next_thread(task);
+       put_task_struct(task);
+       if (!next_task)
+               return NULL;
+
+       saved_tid = *tid;
+       *tid = __task_pid_nr_ns(next_task, PIDTYPE_PID, common->ns);
+       if (!*tid || *tid == common->pid) {
+               /* Run out of tasks of a process.  The tasks of a
+                * thread_group are linked as circular linked list.
+                */
+               *tid = saved_tid;
+               return NULL;
+       }
+
+       get_task_struct(next_task);
+       common->pid_visiting = *tid;
+
+       if (skip_if_dup_files && task->files == task->group_leader->files) {
+               task = next_task;
+               goto retry;
+       }
+
+       return next_task;
+}
+
+static struct task_struct *task_seq_get_next(struct bpf_iter_seq_task_common *common,
                                             u32 *tid,
                                             bool skip_if_dup_files)
 {
        struct task_struct *task = NULL;
        struct pid *pid;
 
+       if (common->type == BPF_TASK_ITER_TID) {
+               if (*tid && *tid != common->pid)
+                       return NULL;
+               rcu_read_lock();
+               pid = find_pid_ns(common->pid, common->ns);
+               if (pid) {
+                       task = get_pid_task(pid, PIDTYPE_TGID);
+                       *tid = common->pid;
+               }
+               rcu_read_unlock();
+
+               return task;
+       }
+
+       if (common->type == BPF_TASK_ITER_TGID) {
+               rcu_read_lock();
+               task = task_group_seq_get_next(common, tid, skip_if_dup_files);
+               rcu_read_unlock();
+
+               return task;
+       }
+
        rcu_read_lock();
 retry:
-       pid = find_ge_pid(*tid, ns);
+       pid = find_ge_pid(*tid, common->ns);
        if (pid) {
-               *tid = pid_nr_ns(pid, ns);
+               *tid = pid_nr_ns(pid, common->ns);
                task = get_pid_task(pid, PIDTYPE_PID);
                if (!task) {
                        ++*tid;
@@ -56,7 +156,7 @@ static void *task_seq_start(struct seq_file *seq, loff_t *pos)
        struct bpf_iter_seq_task_info *info = seq->private;
        struct task_struct *task;
 
-       task = task_seq_get_next(info->common.ns, &info->tid, false);
+       task = task_seq_get_next(&info->common, &info->tid, false);
        if (!task)
                return NULL;
 
@@ -73,7 +173,7 @@ static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos)
        ++*pos;
        ++info->tid;
        put_task_struct((struct task_struct *)v);
-       task = task_seq_get_next(info->common.ns, &info->tid, false);
+       task = task_seq_get_next(&info->common, &info->tid, false);
        if (!task)
                return NULL;
 
@@ -117,6 +217,41 @@ static void task_seq_stop(struct seq_file *seq, void *v)
                put_task_struct((struct task_struct *)v);
 }
 
+static int bpf_iter_attach_task(struct bpf_prog *prog,
+                               union bpf_iter_link_info *linfo,
+                               struct bpf_iter_aux_info *aux)
+{
+       unsigned int flags;
+       struct pid *pid;
+       pid_t tgid;
+
+       if ((!!linfo->task.tid + !!linfo->task.pid + !!linfo->task.pid_fd) > 1)
+               return -EINVAL;
+
+       aux->task.type = BPF_TASK_ITER_ALL;
+       if (linfo->task.tid != 0) {
+               aux->task.type = BPF_TASK_ITER_TID;
+               aux->task.pid = linfo->task.tid;
+       }
+       if (linfo->task.pid != 0) {
+               aux->task.type = BPF_TASK_ITER_TGID;
+               aux->task.pid = linfo->task.pid;
+       }
+       if (linfo->task.pid_fd != 0) {
+               aux->task.type = BPF_TASK_ITER_TGID;
+
+               pid = pidfd_get_pid(linfo->task.pid_fd, &flags);
+               if (IS_ERR(pid))
+                       return PTR_ERR(pid);
+
+               tgid = pid_nr_ns(pid, task_active_pid_ns(current));
+               aux->task.pid = tgid;
+               put_pid(pid);
+       }
+
+       return 0;
+}
+
 static const struct seq_operations task_seq_ops = {
        .start  = task_seq_start,
        .next   = task_seq_next,
@@ -137,8 +272,7 @@ struct bpf_iter_seq_task_file_info {
 static struct file *
 task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
 {
-       struct pid_namespace *ns = info->common.ns;
-       u32 curr_tid = info->tid;
+       u32 saved_tid = info->tid;
        struct task_struct *curr_task;
        unsigned int curr_fd = info->fd;
 
@@ -151,21 +285,18 @@ again:
                curr_task = info->task;
                curr_fd = info->fd;
        } else {
-                curr_task = task_seq_get_next(ns, &curr_tid, true);
+               curr_task = task_seq_get_next(&info->common, &info->tid, true);
                 if (!curr_task) {
                         info->task = NULL;
-                        info->tid = curr_tid;
                         return NULL;
                 }
 
-                /* set info->task and info->tid */
+               /* set info->task */
                info->task = curr_task;
-               if (curr_tid == info->tid) {
+               if (saved_tid == info->tid)
                        curr_fd = info->fd;
-               } else {
-                       info->tid = curr_tid;
+               else
                        curr_fd = 0;
-               }
        }
 
        rcu_read_lock();
@@ -186,9 +317,15 @@ again:
        /* the current task is done, go to the next task */
        rcu_read_unlock();
        put_task_struct(curr_task);
+
+       if (info->common.type == BPF_TASK_ITER_TID) {
+               info->task = NULL;
+               return NULL;
+       }
+
        info->task = NULL;
        info->fd = 0;
-       curr_tid = ++(info->tid);
+       saved_tid = ++(info->tid);
        goto again;
 }
 
@@ -269,6 +406,9 @@ static int init_seq_pidns(void *priv_data, struct bpf_iter_aux_info *aux)
        struct bpf_iter_seq_task_common *common = priv_data;
 
        common->ns = get_pid_ns(task_active_pid_ns(current));
+       common->type = aux->task.type;
+       common->pid = aux->task.pid;
+
        return 0;
 }
 
@@ -307,11 +447,10 @@ enum bpf_task_vma_iter_find_op {
 static struct vm_area_struct *
 task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
 {
-       struct pid_namespace *ns = info->common.ns;
        enum bpf_task_vma_iter_find_op op;
        struct vm_area_struct *curr_vma;
        struct task_struct *curr_task;
-       u32 curr_tid = info->tid;
+       u32 saved_tid = info->tid;
 
        /* If this function returns a non-NULL vma, it holds a reference to
         * the task_struct, and holds read lock on vma->mm->mmap_lock.
@@ -371,14 +510,13 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
                }
        } else {
 again:
-               curr_task = task_seq_get_next(ns, &curr_tid, true);
+               curr_task = task_seq_get_next(&info->common, &info->tid, true);
                if (!curr_task) {
-                       info->tid = curr_tid + 1;
+                       info->tid++;
                        goto finish;
                }
 
-               if (curr_tid != info->tid) {
-                       info->tid = curr_tid;
+               if (saved_tid != info->tid) {
                        /* new task, process the first vma */
                        op = task_vma_iter_first_vma;
                } else {
@@ -430,9 +568,12 @@ again:
        return curr_vma;
 
 next_task:
+       if (info->common.type == BPF_TASK_ITER_TID)
+               goto finish;
+
        put_task_struct(curr_task);
        info->task = NULL;
-       curr_tid++;
+       info->tid++;
        goto again;
 
 finish:
@@ -533,6 +674,7 @@ static const struct bpf_iter_seq_info task_seq_info = {
 
 static struct bpf_iter_reg task_reg_info = {
        .target                 = "task",
+       .attach_target          = bpf_iter_attach_task,
        .feature                = BPF_ITER_RESCHED,
        .ctx_arg_info_size      = 1,
        .ctx_arg_info           = {
@@ -551,6 +693,7 @@ static const struct bpf_iter_seq_info task_file_seq_info = {
 
 static struct bpf_iter_reg task_file_reg_info = {
        .target                 = "task_file",
+       .attach_target          = bpf_iter_attach_task,
        .feature                = BPF_ITER_RESCHED,
        .ctx_arg_info_size      = 2,
        .ctx_arg_info           = {
@@ -571,6 +714,7 @@ static const struct bpf_iter_seq_info task_vma_seq_info = {
 
 static struct bpf_iter_reg task_vma_reg_info = {
        .target                 = "task_vma",
+       .attach_target          = bpf_iter_attach_task,
        .feature                = BPF_ITER_RESCHED,
        .ctx_arg_info_size      = 2,
        .ctx_arg_info           = {
index d6bd107..455b21a 100644 (file)
@@ -110,6 +110,12 @@ union bpf_iter_link_info {
                __u32   cgroup_fd;
                __u64   cgroup_id;
        } cgroup;
+       /* Parameters of task iterators. */
+       struct {
+               __u32   tid;
+               __u32   pid;
+               __u32   pid_fd;
+       } task;
 };
 
 /* BPF syscall commands, see bpf(2) man-page for more details. */