OSDN Git Service

net: add skeleton of bpfilter kernel module
authorAlexei Starovoitov <ast@kernel.org>
Tue, 22 May 2018 02:22:30 +0000 (19:22 -0700)
committerDavid S. Miller <davem@davemloft.net>
Wed, 23 May 2018 17:23:40 +0000 (13:23 -0400)
bpfilter.ko consists of bpfilter_kern.c (normal kernel module code)
and user mode helper code that is embedded into bpfilter.ko

The steps to build bpfilter.ko are the following:
- main.c is compiled by HOSTCC into the bpfilter_umh elf executable file
- with quite a bit of objcopy and Makefile magic the bpfilter_umh elf file
  is converted into bpfilter_umh.o object file
  with _binary_net_bpfilter_bpfilter_umh_start and _end symbols
  Example:
  $ nm ./bld_x64/net/bpfilter/bpfilter_umh.o
  0000000000004cf8 T _binary_net_bpfilter_bpfilter_umh_end
  0000000000004cf8 A _binary_net_bpfilter_bpfilter_umh_size
  0000000000000000 T _binary_net_bpfilter_bpfilter_umh_start
- bpfilter_umh.o and bpfilter_kern.o are linked together into bpfilter.ko

bpfilter_kern.c is a normal kernel module code that calls
the fork_usermode_blob() helper to execute part of its own data
as a user mode process.

Notice that _binary_net_bpfilter_bpfilter_umh_start - end
is placed into .init.rodata section, so it's freed as soon as __init
function of bpfilter.ko is finished.
As part of __init the bpfilter.ko does first request/reply action
via two unix pipe provided by fork_usermode_blob() helper to
make sure that umh is healthy. If not it will kill it via pid.

Later bpfilter_process_sockopt() will be called from bpfilter hooks
in get/setsockopt() to pass iptable commands into umh via bpfilter.ko

If admin does 'rmmod bpfilter' the __exit code bpfilter.ko will
kill umh as well.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
13 files changed:
include/linux/bpfilter.h [new file with mode: 0644]
include/uapi/linux/bpfilter.h [new file with mode: 0644]
net/Kconfig
net/Makefile
net/bpfilter/Kconfig [new file with mode: 0644]
net/bpfilter/Makefile [new file with mode: 0644]
net/bpfilter/bpfilter_kern.c [new file with mode: 0644]
net/bpfilter/main.c [new file with mode: 0644]
net/bpfilter/msgfmt.h [new file with mode: 0644]
net/ipv4/Makefile
net/ipv4/bpfilter/Makefile [new file with mode: 0644]
net/ipv4/bpfilter/sockopt.c [new file with mode: 0644]
net/ipv4/ip_sockglue.c

diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h
new file mode 100644 (file)
index 0000000..687b176
--- /dev/null
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_BPFILTER_H
+#define _LINUX_BPFILTER_H
+
+#include <uapi/linux/bpfilter.h>
+
+struct sock;
+int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char *optval,
+                           unsigned int optlen);
+int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char *optval,
+                           int *optlen);
+extern int (*bpfilter_process_sockopt)(struct sock *sk, int optname,
+                                      char __user *optval,
+                                      unsigned int optlen, bool is_set);
+#endif
diff --git a/include/uapi/linux/bpfilter.h b/include/uapi/linux/bpfilter.h
new file mode 100644 (file)
index 0000000..2ec3cc9
--- /dev/null
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_LINUX_BPFILTER_H
+#define _UAPI_LINUX_BPFILTER_H
+
+#include <linux/if.h>
+
+enum {
+       BPFILTER_IPT_SO_SET_REPLACE = 64,
+       BPFILTER_IPT_SO_SET_ADD_COUNTERS = 65,
+       BPFILTER_IPT_SET_MAX,
+};
+
+enum {
+       BPFILTER_IPT_SO_GET_INFO = 64,
+       BPFILTER_IPT_SO_GET_ENTRIES = 65,
+       BPFILTER_IPT_SO_GET_REVISION_MATCH = 66,
+       BPFILTER_IPT_SO_GET_REVISION_TARGET = 67,
+       BPFILTER_IPT_GET_MAX,
+};
+
+#endif /* _UAPI_LINUX_BPFILTER_H */
index df8d45e..ba554ce 100644 (file)
@@ -202,6 +202,8 @@ source "net/bridge/netfilter/Kconfig"
 
 endif
 
+source "net/bpfilter/Kconfig"
+
 source "net/dccp/Kconfig"
 source "net/sctp/Kconfig"
 source "net/rds/Kconfig"
index 77aadde..bdaf539 100644 (file)
@@ -20,6 +20,7 @@ obj-$(CONFIG_TLS)             += tls/
 obj-$(CONFIG_XFRM)             += xfrm/
 obj-$(CONFIG_UNIX)             += unix/
 obj-$(CONFIG_NET)              += ipv6/
+obj-$(CONFIG_BPFILTER)         += bpfilter/
 obj-$(CONFIG_PACKET)           += packet/
 obj-$(CONFIG_NET_KEY)          += key/
 obj-$(CONFIG_BRIDGE)           += bridge/
diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig
new file mode 100644 (file)
index 0000000..60725c5
--- /dev/null
@@ -0,0 +1,16 @@
+menuconfig BPFILTER
+       bool "BPF based packet filtering framework (BPFILTER)"
+       default n
+       depends on NET && BPF
+       help
+         This builds experimental bpfilter framework that is aiming to
+         provide netfilter compatible functionality via BPF
+
+if BPFILTER
+config BPFILTER_UMH
+       tristate "bpfilter kernel module with user mode helper"
+       default m
+       help
+         This builds bpfilter kernel module with embedded user mode helper
+endif
+
diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile
new file mode 100644 (file)
index 0000000..2af752c
--- /dev/null
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the Linux BPFILTER layer.
+#
+
+hostprogs-y := bpfilter_umh
+bpfilter_umh-objs := main.o
+HOSTCFLAGS += -I. -Itools/include/
+ifeq ($(CONFIG_BPFILTER_UMH), y)
+# builtin bpfilter_umh should be compiled with -static
+# since rootfs isn't mounted at the time of __init
+# function is called and do_execv won't find elf interpreter
+HOSTLDFLAGS += -static
+endif
+
+# a bit of elf magic to convert bpfilter_umh binary into a binary blob
+# inside bpfilter_umh.o elf file referenced by
+# _binary_net_bpfilter_bpfilter_umh_start symbol
+# which bpfilter_kern.c passes further into umh blob loader at run-time
+quiet_cmd_copy_umh = GEN $@
+      cmd_copy_umh = echo ':' > $(obj)/.bpfilter_umh.o.cmd; \
+      $(OBJCOPY) -I binary -O $(CONFIG_OUTPUT_FORMAT) \
+      -B `$(OBJDUMP) -f $<|grep architecture|cut -d, -f1|cut -d' ' -f2` \
+      --rename-section .data=.init.rodata $< $@
+
+$(obj)/bpfilter_umh.o: $(obj)/bpfilter_umh
+       $(call cmd,copy_umh)
+
+obj-$(CONFIG_BPFILTER_UMH) += bpfilter.o
+bpfilter-objs += bpfilter_kern.o bpfilter_umh.o
diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
new file mode 100644 (file)
index 0000000..7596314
--- /dev/null
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/umh.h>
+#include <linux/bpfilter.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include "msgfmt.h"
+
+#define UMH_start _binary_net_bpfilter_bpfilter_umh_start
+#define UMH_end _binary_net_bpfilter_bpfilter_umh_end
+
+extern char UMH_start;
+extern char UMH_end;
+
+static struct umh_info info;
+/* since ip_getsockopt() can run in parallel, serialize access to umh */
+static DEFINE_MUTEX(bpfilter_lock);
+
+static void shutdown_umh(struct umh_info *info)
+{
+       struct task_struct *tsk;
+
+       tsk = pid_task(find_vpid(info->pid), PIDTYPE_PID);
+       if (tsk)
+               force_sig(SIGKILL, tsk);
+       fput(info->pipe_to_umh);
+       fput(info->pipe_from_umh);
+}
+
+static void __stop_umh(void)
+{
+       if (bpfilter_process_sockopt) {
+               bpfilter_process_sockopt = NULL;
+               shutdown_umh(&info);
+       }
+}
+
+static void stop_umh(void)
+{
+       mutex_lock(&bpfilter_lock);
+       __stop_umh();
+       mutex_unlock(&bpfilter_lock);
+}
+
+static int __bpfilter_process_sockopt(struct sock *sk, int optname,
+                                     char __user *optval,
+                                     unsigned int optlen, bool is_set)
+{
+       struct mbox_request req;
+       struct mbox_reply reply;
+       loff_t pos;
+       ssize_t n;
+       int ret;
+
+       req.is_set = is_set;
+       req.pid = current->pid;
+       req.cmd = optname;
+       req.addr = (long)optval;
+       req.len = optlen;
+       mutex_lock(&bpfilter_lock);
+       n = __kernel_write(info.pipe_to_umh, &req, sizeof(req), &pos);
+       if (n != sizeof(req)) {
+               pr_err("write fail %zd\n", n);
+               __stop_umh();
+               ret = -EFAULT;
+               goto out;
+       }
+       pos = 0;
+       n = kernel_read(info.pipe_from_umh, &reply, sizeof(reply), &pos);
+       if (n != sizeof(reply)) {
+               pr_err("read fail %zd\n", n);
+               __stop_umh();
+               ret = -EFAULT;
+               goto out;
+       }
+       ret = reply.status;
+out:
+       mutex_unlock(&bpfilter_lock);
+       return ret;
+}
+
+static int __init load_umh(void)
+{
+       int err;
+
+       /* fork usermode process */
+       err = fork_usermode_blob(&UMH_start, &UMH_end - &UMH_start, &info);
+       if (err)
+               return err;
+       pr_info("Loaded bpfilter_umh pid %d\n", info.pid);
+
+       /* health check that usermode process started correctly */
+       if (__bpfilter_process_sockopt(NULL, 0, 0, 0, 0) != 0) {
+               stop_umh();
+               return -EFAULT;
+       }
+       bpfilter_process_sockopt = &__bpfilter_process_sockopt;
+       return 0;
+}
+
+static void __exit fini_umh(void)
+{
+       stop_umh();
+}
+module_init(load_umh);
+module_exit(fini_umh);
+MODULE_LICENSE("GPL");
diff --git a/net/bpfilter/main.c b/net/bpfilter/main.c
new file mode 100644 (file)
index 0000000..81bbc16
--- /dev/null
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sys/uio.h>
+#include <errno.h>
+#include <stdio.h>
+#include <sys/socket.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include "include/uapi/linux/bpf.h"
+#include <asm/unistd.h>
+#include "msgfmt.h"
+
+int debug_fd;
+
+static int handle_get_cmd(struct mbox_request *cmd)
+{
+       switch (cmd->cmd) {
+       case 0:
+               return 0;
+       default:
+               break;
+       }
+       return -ENOPROTOOPT;
+}
+
+static int handle_set_cmd(struct mbox_request *cmd)
+{
+       return -ENOPROTOOPT;
+}
+
+static void loop(void)
+{
+       while (1) {
+               struct mbox_request req;
+               struct mbox_reply reply;
+               int n;
+
+               n = read(0, &req, sizeof(req));
+               if (n != sizeof(req)) {
+                       dprintf(debug_fd, "invalid request %d\n", n);
+                       return;
+               }
+
+               reply.status = req.is_set ?
+                       handle_set_cmd(&req) :
+                       handle_get_cmd(&req);
+
+               n = write(1, &reply, sizeof(reply));
+               if (n != sizeof(reply)) {
+                       dprintf(debug_fd, "reply failed %d\n", n);
+                       return;
+               }
+       }
+}
+
+int main(void)
+{
+       debug_fd = open("/dev/console", 00000002 | 00000100);
+       dprintf(debug_fd, "Started bpfilter\n");
+       loop();
+       close(debug_fd);
+       return 0;
+}
diff --git a/net/bpfilter/msgfmt.h b/net/bpfilter/msgfmt.h
new file mode 100644 (file)
index 0000000..98d121c
--- /dev/null
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NET_BPFILTER_MSGFMT_H
+#define _NET_BPFILTER_MSGFMT_H
+
+struct mbox_request {
+       __u64 addr;
+       __u32 len;
+       __u32 is_set;
+       __u32 cmd;
+       __u32 pid;
+};
+
+struct mbox_reply {
+       __u32 status;
+};
+
+#endif
index b379520..7018f91 100644 (file)
@@ -16,6 +16,8 @@ obj-y     := route.o inetpeer.o protocol.o \
             inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \
             metrics.o
 
+obj-$(CONFIG_BPFILTER) += bpfilter/
+
 obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
 obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
 obj-$(CONFIG_PROC_FS) += proc.o
diff --git a/net/ipv4/bpfilter/Makefile b/net/ipv4/bpfilter/Makefile
new file mode 100644 (file)
index 0000000..ce262d7
--- /dev/null
@@ -0,0 +1,2 @@
+obj-$(CONFIG_BPFILTER) += sockopt.o
+
diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c
new file mode 100644 (file)
index 0000000..42a96d2
--- /dev/null
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/uaccess.h>
+#include <linux/bpfilter.h>
+#include <uapi/linux/bpf.h>
+#include <linux/wait.h>
+#include <linux/kmod.h>
+
+int (*bpfilter_process_sockopt)(struct sock *sk, int optname,
+                               char __user *optval,
+                               unsigned int optlen, bool is_set);
+EXPORT_SYMBOL_GPL(bpfilter_process_sockopt);
+
+int bpfilter_mbox_request(struct sock *sk, int optname, char __user *optval,
+                         unsigned int optlen, bool is_set)
+{
+       if (!bpfilter_process_sockopt) {
+               int err = request_module("bpfilter");
+
+               if (err)
+                       return err;
+               if (!bpfilter_process_sockopt)
+                       return -ECHILD;
+       }
+       return bpfilter_process_sockopt(sk, optname, optval, optlen, is_set);
+}
+
+int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval,
+                           unsigned int optlen)
+{
+       return bpfilter_mbox_request(sk, optname, optval, optlen, true);
+}
+
+int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
+                           int __user *optlen)
+{
+       int len;
+
+       if (get_user(len, optlen))
+               return -EFAULT;
+
+       return bpfilter_mbox_request(sk, optname, optval, len, false);
+}
index 5ad2d8e..e0791fa 100644 (file)
@@ -47,6 +47,8 @@
 #include <linux/errqueue.h>
 #include <linux/uaccess.h>
 
+#include <linux/bpfilter.h>
+
 /*
  *     SOL_IP control messages.
  */
@@ -1244,6 +1246,11 @@ int ip_setsockopt(struct sock *sk, int level,
                return -ENOPROTOOPT;
 
        err = do_ip_setsockopt(sk, level, optname, optval, optlen);
+#ifdef CONFIG_BPFILTER
+       if (optname >= BPFILTER_IPT_SO_SET_REPLACE &&
+           optname < BPFILTER_IPT_SET_MAX)
+               err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen);
+#endif
 #ifdef CONFIG_NETFILTER
        /* we need to exclude all possible ENOPROTOOPTs except default case */
        if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
@@ -1552,6 +1559,11 @@ int ip_getsockopt(struct sock *sk, int level,
        int err;
 
        err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
+#ifdef CONFIG_BPFILTER
+       if (optname >= BPFILTER_IPT_SO_GET_INFO &&
+           optname < BPFILTER_IPT_GET_MAX)
+               err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
+#endif
 #ifdef CONFIG_NETFILTER
        /* we need to exclude all possible ENOPROTOOPTs except default case */
        if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
@@ -1584,6 +1596,11 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
        err = do_ip_getsockopt(sk, level, optname, optval, optlen,
                MSG_CMSG_COMPAT);
 
+#ifdef CONFIG_BPFILTER
+       if (optname >= BPFILTER_IPT_SO_GET_INFO &&
+           optname < BPFILTER_IPT_GET_MAX)
+               err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
+#endif
 #ifdef CONFIG_NETFILTER
        /* we need to exclude all possible ENOPROTOOPTs except default case */
        if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&