OSDN Git Service

bpf: add writable context for raw tracepoints
authorMatt Mullins <mmullins@fb.com>
Fri, 26 Apr 2019 18:49:47 +0000 (11:49 -0700)
committerAlexei Starovoitov <ast@kernel.org>
Sat, 27 Apr 2019 02:04:19 +0000 (19:04 -0700)
This is an opt-in interface that allows a tracepoint to provide a safe
buffer that can be written from a BPF_PROG_TYPE_RAW_TRACEPOINT program.
The size of the buffer must be a compile-time constant, and is checked
before allowing a BPF program to attach to a tracepoint that uses this
feature.

The pointer to this buffer will be the first argument of tracepoints
that opt in; the pointer is valid and can be bpf_probe_read() by both
BPF_PROG_TYPE_RAW_TRACEPOINT and BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE
programs that attach to such a tracepoint, but the buffer to which it
points may only be written by the latter.

Signed-off-by: Matt Mullins <mmullins@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
include/linux/bpf.h
include/linux/bpf_types.h
include/linux/tracepoint-defs.h
include/trace/bpf_probe.h
include/uapi/linux/bpf.h
kernel/bpf/syscall.c
kernel/bpf/verifier.c
kernel/trace/bpf_trace.c

index f15432d..cd6341e 100644 (file)
@@ -272,6 +272,7 @@ enum bpf_reg_type {
        PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
        PTR_TO_TCP_SOCK,         /* reg points to struct tcp_sock */
        PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
+       PTR_TO_TP_BUFFER,        /* reg points to a writable raw tp's buffer */
 };
 
 /* The information passed from prog-specific *_is_valid_access
@@ -361,6 +362,7 @@ struct bpf_prog_aux {
        u32 used_map_cnt;
        u32 max_ctx_offset;
        u32 max_pkt_offset;
+       u32 max_tp_access;
        u32 stack_depth;
        u32 id;
        u32 func_cnt; /* used by non-func prog as the number of func progs */
index d26991a..a10d37b 100644 (file)
@@ -25,6 +25,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
 BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
 BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
 BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
+BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable)
 #endif
 #ifdef CONFIG_CGROUP_BPF
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
index 49ba9cd..b29950a 100644 (file)
@@ -45,6 +45,7 @@ struct bpf_raw_event_map {
        struct tracepoint       *tp;
        void                    *bpf_func;
        u32                     num_args;
+       u32                     writable_size;
 } __aligned(32);
 
 #endif
index 505dae0..d6e556c 100644 (file)
@@ -69,8 +69,7 @@ __bpf_trace_##call(void *__data, proto)                                       \
  * to make sure that if the tracepoint handling changes, the
  * bpf probe will fail to compile unless it too is updated.
  */
-#undef DEFINE_EVENT
-#define DEFINE_EVENT(template, call, proto, args)                      \
+#define __DEFINE_EVENT(template, call, proto, args, size)              \
 static inline void bpf_test_probe_##call(void)                         \
 {                                                                      \
        check_trace_callback_type_##call(__bpf_trace_##template);       \
@@ -81,12 +80,36 @@ __bpf_trace_tp_map_##call = {                                               \
        .tp             = &__tracepoint_##call,                         \
        .bpf_func       = (void *)__bpf_trace_##template,               \
        .num_args       = COUNT_ARGS(args),                             \
+       .writable_size  = size,                                         \
 };
 
+#define FIRST(x, ...) x
+
+#undef DEFINE_EVENT_WRITABLE
+#define DEFINE_EVENT_WRITABLE(template, call, proto, args, size)       \
+static inline void bpf_test_buffer_##call(void)                                \
+{                                                                      \
+       /* BUILD_BUG_ON() is ignored if the code is completely eliminated, but \
+        * BUILD_BUG_ON_ZERO() uses a different mechanism that is not   \
+        * dead-code-eliminated.                                        \
+        */                                                             \
+       FIRST(proto);                                                   \
+       (void)BUILD_BUG_ON_ZERO(size != sizeof(*FIRST(args)));          \
+}                                                                      \
+__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size)
+
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, call, proto, args)                      \
+       __DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), 0)
 
 #undef DEFINE_EVENT_PRINT
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
        DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#undef DEFINE_EVENT_WRITABLE
+#undef __DEFINE_EVENT
+#undef FIRST
+
 #endif /* CONFIG_BPF_EVENTS */
index eaf2d32..f7fa7a3 100644 (file)
@@ -168,6 +168,7 @@ enum bpf_prog_type {
        BPF_PROG_TYPE_SK_REUSEPORT,
        BPF_PROG_TYPE_FLOW_DISSECTOR,
        BPF_PROG_TYPE_CGROUP_SYSCTL,
+       BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
 };
 
 enum bpf_attach_type {
index b0de495..ae141e7 100644 (file)
@@ -1789,12 +1789,16 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
        }
        raw_tp->btp = btp;
 
-       prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd,
-                                BPF_PROG_TYPE_RAW_TRACEPOINT);
+       prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
        if (IS_ERR(prog)) {
                err = PTR_ERR(prog);
                goto out_free_tp;
        }
+       if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT &&
+           prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) {
+               err = -EINVAL;
+               goto out_put_prog;
+       }
 
        err = bpf_probe_register(raw_tp->btp, prog);
        if (err)
index 423f242..2ef442c 100644 (file)
@@ -405,6 +405,7 @@ static const char * const reg_type_str[] = {
        [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
        [PTR_TO_TCP_SOCK]       = "tcp_sock",
        [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
+       [PTR_TO_TP_BUFFER]      = "tp_buffer",
 };
 
 static char slot_type_char[] = {
@@ -1993,6 +1994,32 @@ static int check_ctx_reg(struct bpf_verifier_env *env,
        return 0;
 }
 
+static int check_tp_buffer_access(struct bpf_verifier_env *env,
+                                 const struct bpf_reg_state *reg,
+                                 int regno, int off, int size)
+{
+       if (off < 0) {
+               verbose(env,
+                       "R%d invalid tracepoint buffer access: off=%d, size=%d",
+                       regno, off, size);
+               return -EACCES;
+       }
+       if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
+               char tn_buf[48];
+
+               tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+               verbose(env,
+                       "R%d invalid variable buffer offset: off=%d, var_off=%s",
+                       regno, off, tn_buf);
+               return -EACCES;
+       }
+       if (off + size > env->prog->aux->max_tp_access)
+               env->prog->aux->max_tp_access = off + size;
+
+       return 0;
+}
+
+
 /* truncate register to smaller size (in bytes)
  * must be called with size < BPF_REG_SIZE
  */
@@ -2137,6 +2164,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
                err = check_sock_access(env, insn_idx, regno, off, size, t);
                if (!err && value_regno >= 0)
                        mark_reg_unknown(env, regs, value_regno);
+       } else if (reg->type == PTR_TO_TP_BUFFER) {
+               err = check_tp_buffer_access(env, reg, regno, off, size);
+               if (!err && t == BPF_READ && value_regno >= 0)
+                       mark_reg_unknown(env, regs, value_regno);
        } else {
                verbose(env, "R%d invalid mem access '%s'\n", regno,
                        reg_type_str[reg->type]);
index 91800be..8607aba 100644 (file)
@@ -915,6 +915,27 @@ const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
 const struct bpf_prog_ops raw_tracepoint_prog_ops = {
 };
 
+static bool raw_tp_writable_prog_is_valid_access(int off, int size,
+                                                enum bpf_access_type type,
+                                                const struct bpf_prog *prog,
+                                                struct bpf_insn_access_aux *info)
+{
+       if (off == 0) {
+               if (size != sizeof(u64) || type != BPF_READ)
+                       return false;
+               info->reg_type = PTR_TO_TP_BUFFER;
+       }
+       return raw_tp_prog_is_valid_access(off, size, type, prog, info);
+}
+
+const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = {
+       .get_func_proto  = raw_tp_prog_func_proto,
+       .is_valid_access = raw_tp_writable_prog_is_valid_access,
+};
+
+const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = {
+};
+
 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
                                    const struct bpf_prog *prog,
                                    struct bpf_insn_access_aux *info)
@@ -1204,6 +1225,9 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *
        if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64))
                return -EINVAL;
 
+       if (prog->aux->max_tp_access > btp->writable_size)
+               return -EINVAL;
+
        return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog);
 }