1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2020 Cloudflare
4 * Test suite for SOCKMAP/SOCKHASH holding listening sockets.
6 * 1. BPF map operations - bpf_map_{update,lookup delete}_elem
7 * 2. BPF redirect helpers - bpf_{sk,msg}_redirect_map
8 * 3. BPF reuseport helper - bpf_sk_select_reuseport
11 #include <linux/compiler.h>
15 #include <netinet/in.h>
19 #include <sys/select.h>
21 #include <linux/vm_sockets.h>
24 #include <bpf/libbpf.h>
27 #include "test_progs.h"
28 #include "test_sockmap_listen.skel.h"
30 #include "sockmap_helpers.h"
32 static void test_insert_invalid(struct test_sockmap_listen *skel __always_unused,
33 int family, int sotype, int mapfd)
40 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
41 if (!err || errno != EINVAL)
42 FAIL_ERRNO("map_update: expected EINVAL");
45 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
46 if (!err || errno != EBADF)
47 FAIL_ERRNO("map_update: expected EBADF");
50 static void test_insert_opened(struct test_sockmap_listen *skel __always_unused,
51 int family, int sotype, int mapfd)
57 s = xsocket(family, sotype, 0);
63 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
64 if (sotype == SOCK_STREAM) {
65 if (!err || errno != EOPNOTSUPP)
66 FAIL_ERRNO("map_update: expected EOPNOTSUPP");
68 FAIL_ERRNO("map_update: expected success");
72 static void test_insert_bound(struct test_sockmap_listen *skel __always_unused,
73 int family, int sotype, int mapfd)
75 struct sockaddr_storage addr;
81 init_addr_loopback(family, &addr, &len);
83 s = xsocket(family, sotype, 0);
87 err = xbind(s, sockaddr(&addr), len);
93 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
94 if (!err || errno != EOPNOTSUPP)
95 FAIL_ERRNO("map_update: expected EOPNOTSUPP");
100 static void test_insert(struct test_sockmap_listen *skel __always_unused,
101 int family, int sotype, int mapfd)
107 s = socket_loopback(family, sotype);
113 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
117 static void test_delete_after_insert(struct test_sockmap_listen *skel __always_unused,
118 int family, int sotype, int mapfd)
124 s = socket_loopback(family, sotype);
130 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
131 xbpf_map_delete_elem(mapfd, &key);
135 static void test_delete_after_close(struct test_sockmap_listen *skel __always_unused,
136 int family, int sotype, int mapfd)
142 s = socket_loopback(family, sotype);
148 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
153 err = bpf_map_delete_elem(mapfd, &key);
154 if (!err || (errno != EINVAL && errno != ENOENT))
155 /* SOCKMAP and SOCKHASH return different error codes */
156 FAIL_ERRNO("map_delete: expected EINVAL/EINVAL");
159 static void test_lookup_after_insert(struct test_sockmap_listen *skel __always_unused,
160 int family, int sotype, int mapfd)
167 s = socket_loopback(family, sotype);
173 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
175 len = sizeof(cookie);
176 xgetsockopt(s, SOL_SOCKET, SO_COOKIE, &cookie, &len);
178 xbpf_map_lookup_elem(mapfd, &key, &value);
180 if (value != cookie) {
181 FAIL("map_lookup: have %#llx, want %#llx",
182 (unsigned long long)value, (unsigned long long)cookie);
188 static void test_lookup_after_delete(struct test_sockmap_listen *skel __always_unused,
189 int family, int sotype, int mapfd)
195 s = socket_loopback(family, sotype);
201 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
202 xbpf_map_delete_elem(mapfd, &key);
205 err = bpf_map_lookup_elem(mapfd, &key, &value);
206 if (!err || errno != ENOENT)
207 FAIL_ERRNO("map_lookup: expected ENOENT");
212 static void test_lookup_32_bit_value(struct test_sockmap_listen *skel __always_unused,
213 int family, int sotype, int mapfd)
218 s = socket_loopback(family, sotype);
222 mapfd = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(key),
223 sizeof(value32), 1, NULL);
225 FAIL_ERRNO("map_create");
231 xbpf_map_update_elem(mapfd, &key, &value32, BPF_NOEXIST);
234 err = bpf_map_lookup_elem(mapfd, &key, &value32);
235 if (!err || errno != ENOSPC)
236 FAIL_ERRNO("map_lookup: expected ENOSPC");
243 static void test_update_existing(struct test_sockmap_listen *skel __always_unused,
244 int family, int sotype, int mapfd)
250 s1 = socket_loopback(family, sotype);
254 s2 = socket_loopback(family, sotype);
260 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
263 xbpf_map_update_elem(mapfd, &key, &value, BPF_EXIST);
269 /* Exercise the code path where we destroy child sockets that never
270 * got accept()'ed, aka orphans, when parent socket gets closed.
272 static void do_destroy_orphan_child(int family, int sotype, int mapfd)
274 struct sockaddr_storage addr;
280 s = socket_loopback(family, sotype);
285 err = xgetsockname(s, sockaddr(&addr), &len);
291 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
293 c = xsocket(family, sotype, 0);
297 xconnect(c, sockaddr(&addr), len);
303 static void test_destroy_orphan_child(struct test_sockmap_listen *skel,
304 int family, int sotype, int mapfd)
306 int msg_verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
307 int skb_verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
310 enum bpf_attach_type atype;
313 { msg_verdict, BPF_SK_MSG_VERDICT },
314 { skb_verdict, BPF_SK_SKB_VERDICT },
316 const struct test *t;
318 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
319 if (t->progfd != -1 &&
320 xbpf_prog_attach(t->progfd, mapfd, t->atype, 0) != 0)
323 do_destroy_orphan_child(family, sotype, mapfd);
326 xbpf_prog_detach2(t->progfd, mapfd, t->atype);
330 /* Perform a passive open after removing listening socket from SOCKMAP
331 * to ensure that callbacks get restored properly.
333 static void test_clone_after_delete(struct test_sockmap_listen *skel __always_unused,
334 int family, int sotype, int mapfd)
336 struct sockaddr_storage addr;
342 s = socket_loopback(family, sotype);
347 err = xgetsockname(s, sockaddr(&addr), &len);
353 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
354 xbpf_map_delete_elem(mapfd, &key);
356 c = xsocket(family, sotype, 0);
360 xconnect(c, sockaddr(&addr), len);
366 /* Check that child socket that got created while parent was in a
367 * SOCKMAP, but got accept()'ed only after the parent has been removed
368 * from SOCKMAP, gets cloned without parent psock state or callbacks.
370 static void test_accept_after_delete(struct test_sockmap_listen *skel __always_unused,
371 int family, int sotype, int mapfd)
373 struct sockaddr_storage addr;
379 s = socket_loopback(family, sotype | SOCK_NONBLOCK);
384 err = xgetsockname(s, sockaddr(&addr), &len);
389 err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
393 c = xsocket(family, sotype, 0);
397 /* Create child while parent is in sockmap */
398 err = xconnect(c, sockaddr(&addr), len);
402 /* Remove parent from sockmap */
403 err = xbpf_map_delete_elem(mapfd, &zero);
407 p = xaccept_nonblock(s, NULL, NULL);
411 /* Check that child sk_user_data is not set */
413 xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
422 /* Check that child socket that got created and accepted while parent
423 * was in a SOCKMAP is cloned without parent psock state or callbacks.
425 static void test_accept_before_delete(struct test_sockmap_listen *skel __always_unused,
426 int family, int sotype, int mapfd)
428 struct sockaddr_storage addr;
429 const u32 zero = 0, one = 1;
434 s = socket_loopback(family, sotype | SOCK_NONBLOCK);
439 err = xgetsockname(s, sockaddr(&addr), &len);
444 err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
448 c = xsocket(family, sotype, 0);
452 /* Create & accept child while parent is in sockmap */
453 err = xconnect(c, sockaddr(&addr), len);
457 p = xaccept_nonblock(s, NULL, NULL);
461 /* Check that child sk_user_data is not set */
463 xbpf_map_update_elem(mapfd, &one, &value, BPF_NOEXIST);
472 struct connect_accept_ctx {
475 unsigned int nr_iter;
478 static bool is_thread_done(struct connect_accept_ctx *ctx)
480 return READ_ONCE(ctx->done);
483 static void *connect_accept_thread(void *arg)
485 struct connect_accept_ctx *ctx = arg;
486 struct sockaddr_storage addr;
487 int family, socktype;
494 err = xgetsockname(s, sockaddr(&addr), &len);
498 len = sizeof(family);
499 err = xgetsockopt(s, SOL_SOCKET, SO_DOMAIN, &family, &len);
503 len = sizeof(socktype);
504 err = xgetsockopt(s, SOL_SOCKET, SO_TYPE, &socktype, &len);
508 for (i = 0; i < ctx->nr_iter; i++) {
511 c = xsocket(family, socktype, 0);
515 err = xconnect(c, (struct sockaddr *)&addr, sizeof(addr));
521 p = xaccept_nonblock(s, NULL, NULL);
531 WRITE_ONCE(ctx->done, 1);
535 static void test_syn_recv_insert_delete(struct test_sockmap_listen *skel __always_unused,
536 int family, int sotype, int mapfd)
538 struct connect_accept_ctx ctx = { 0 };
539 struct sockaddr_storage addr;
546 s = socket_loopback(family, sotype | SOCK_NONBLOCK);
551 err = xgetsockname(s, sockaddr(&addr), &len);
558 err = xpthread_create(&t, NULL, connect_accept_thread, &ctx);
563 while (!is_thread_done(&ctx)) {
564 err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
568 err = xbpf_map_delete_elem(mapfd, &zero);
573 xpthread_join(t, NULL);
578 static void *listen_thread(void *arg)
580 struct sockaddr unspec = { AF_UNSPEC };
581 struct connect_accept_ctx *ctx = arg;
586 for (i = 0; i < ctx->nr_iter; i++) {
590 err = xconnect(s, &unspec, sizeof(unspec));
595 WRITE_ONCE(ctx->done, 1);
599 static void test_race_insert_listen(struct test_sockmap_listen *skel __always_unused,
600 int family, int socktype, int mapfd)
602 struct connect_accept_ctx ctx = { 0 };
609 s = xsocket(family, socktype, 0);
613 err = xsetsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
620 err = pthread_create(&t, NULL, listen_thread, &ctx);
625 while (!is_thread_done(&ctx)) {
626 err = bpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
627 /* Expecting EOPNOTSUPP before listen() */
628 if (err && errno != EOPNOTSUPP) {
629 FAIL_ERRNO("map_update");
633 err = bpf_map_delete_elem(mapfd, &zero);
634 /* Expecting no entry after unhash on connect(AF_UNSPEC) */
635 if (err && errno != EINVAL && errno != ENOENT) {
636 FAIL_ERRNO("map_delete");
641 xpthread_join(t, NULL);
646 static void zero_verdict_count(int mapfd)
648 unsigned int zero = 0;
652 xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
654 xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
662 static const char *redir_mode_str(enum redir_mode mode)
674 static void redir_to_connected(int family, int sotype, int sock_mapfd,
675 int verd_mapfd, enum redir_mode mode)
677 const char *log_prefix = redir_mode_str(mode);
678 int s, c0, c1, p0, p1;
684 zero_verdict_count(verd_mapfd);
686 s = socket_loopback(family, sotype | SOCK_NONBLOCK);
690 err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
694 err = add_to_sockmap(sock_mapfd, p0, p1);
698 n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1);
700 FAIL_ERRNO("%s: write", log_prefix);
702 FAIL("%s: incomplete write", log_prefix);
707 err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
711 FAIL("%s: want pass count 1, have %d", log_prefix, pass);
712 n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC);
714 FAIL_ERRNO("%s: recv_timeout", log_prefix);
716 FAIL("%s: incomplete recv", log_prefix);
727 static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
728 struct bpf_map *inner_map, int family,
731 int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
732 int parser = bpf_program__fd(skel->progs.prog_stream_parser);
733 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
734 int sock_map = bpf_map__fd(inner_map);
737 err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
740 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
744 redir_to_connected(family, sotype, sock_map, verdict_map,
747 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
749 xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
752 static void test_msg_redir_to_connected(struct test_sockmap_listen *skel,
753 struct bpf_map *inner_map, int family,
756 int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
757 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
758 int sock_map = bpf_map__fd(inner_map);
761 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
765 redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
767 xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
770 static void redir_to_listening(int family, int sotype, int sock_mapfd,
771 int verd_mapfd, enum redir_mode mode)
773 const char *log_prefix = redir_mode_str(mode);
774 struct sockaddr_storage addr;
780 zero_verdict_count(verd_mapfd);
782 s = socket_loopback(family, sotype | SOCK_NONBLOCK);
787 err = xgetsockname(s, sockaddr(&addr), &len);
791 c = xsocket(family, sotype, 0);
794 err = xconnect(c, sockaddr(&addr), len);
798 p = xaccept_nonblock(s, NULL, NULL);
802 err = add_to_sockmap(sock_mapfd, s, p);
806 n = write(mode == REDIR_INGRESS ? c : p, "a", 1);
807 if (n < 0 && errno != EACCES)
808 FAIL_ERRNO("%s: write", log_prefix);
810 FAIL("%s: incomplete write", log_prefix);
815 err = xbpf_map_lookup_elem(verd_mapfd, &key, &drop);
819 FAIL("%s: want drop count 1, have %d", log_prefix, drop);
829 static void test_skb_redir_to_listening(struct test_sockmap_listen *skel,
830 struct bpf_map *inner_map, int family,
833 int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
834 int parser = bpf_program__fd(skel->progs.prog_stream_parser);
835 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
836 int sock_map = bpf_map__fd(inner_map);
839 err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
842 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
846 redir_to_listening(family, sotype, sock_map, verdict_map,
849 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
851 xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
854 static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
855 struct bpf_map *inner_map, int family,
858 int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
859 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
860 int sock_map = bpf_map__fd(inner_map);
863 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
867 redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
869 xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
872 static void redir_partial(int family, int sotype, int sock_map, int parser_map)
874 int s, c0, c1, p0, p1;
875 int err, n, key, value;
879 value = sizeof(buf) - 1;
880 err = xbpf_map_update_elem(parser_map, &key, &value, 0);
884 s = socket_loopback(family, sotype | SOCK_NONBLOCK);
886 goto clean_parser_map;
888 err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
892 err = add_to_sockmap(sock_map, p0, p1);
896 n = xsend(c1, buf, sizeof(buf), 0);
898 FAIL("incomplete write");
900 n = xrecv_nonblock(c0, buf, sizeof(buf), 0);
901 if (n != sizeof(buf) - 1)
902 FAIL("expect %zu, received %d", sizeof(buf) - 1, n);
915 xbpf_map_update_elem(parser_map, &key, &value, 0);
918 static void test_skb_redir_partial(struct test_sockmap_listen *skel,
919 struct bpf_map *inner_map, int family,
922 int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
923 int parser = bpf_program__fd(skel->progs.prog_stream_parser);
924 int parser_map = bpf_map__fd(skel->maps.parser_map);
925 int sock_map = bpf_map__fd(inner_map);
928 err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
932 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
936 redir_partial(family, sotype, sock_map, parser_map);
938 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
940 xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
943 static void test_reuseport_select_listening(int family, int sotype,
944 int sock_map, int verd_map,
947 struct sockaddr_storage addr;
954 zero_verdict_count(verd_map);
956 s = socket_loopback_reuseport(family, sotype | SOCK_NONBLOCK,
962 err = xgetsockname(s, sockaddr(&addr), &len);
968 err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
972 c = xsocket(family, sotype, 0);
975 err = xconnect(c, sockaddr(&addr), len);
979 if (sotype == SOCK_STREAM) {
982 p = xaccept_nonblock(s, NULL, NULL);
990 n = xsend(c, &b, sizeof(b), 0);
994 n = xrecv_nonblock(s, &b, sizeof(b), 0);
1000 err = xbpf_map_lookup_elem(verd_map, &key, &pass);
1004 FAIL("want pass count 1, have %d", pass);
1012 static void test_reuseport_select_connected(int family, int sotype,
1013 int sock_map, int verd_map,
1016 struct sockaddr_storage addr;
1017 int s, c0, c1, p0, err;
1023 zero_verdict_count(verd_map);
1025 s = socket_loopback_reuseport(family, sotype, reuseport_prog);
1029 /* Populate sock_map[0] to avoid ENOENT on first connection */
1032 err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
1037 err = xgetsockname(s, sockaddr(&addr), &len);
1041 c0 = xsocket(family, sotype, 0);
1045 err = xconnect(c0, sockaddr(&addr), len);
1049 if (sotype == SOCK_STREAM) {
1050 p0 = xaccept_nonblock(s, NULL, NULL);
1054 p0 = xsocket(family, sotype, 0);
1059 err = xgetsockname(c0, sockaddr(&addr), &len);
1063 err = xconnect(p0, sockaddr(&addr), len);
1068 /* Update sock_map[0] to redirect to a connected socket */
1071 err = xbpf_map_update_elem(sock_map, &key, &value, BPF_EXIST);
1075 c1 = xsocket(family, sotype, 0);
1080 err = xgetsockname(s, sockaddr(&addr), &len);
1085 err = connect(c1, sockaddr(&addr), len);
1086 if (sotype == SOCK_DGRAM) {
1090 n = xsend(c1, &b, sizeof(b), 0);
1094 n = recv_timeout(c1, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
1097 if (!err || errno != ECONNREFUSED)
1098 FAIL_ERRNO("connect: expected ECONNREFUSED");
1101 err = xbpf_map_lookup_elem(verd_map, &key, &drop);
1105 FAIL("want drop count 1, have %d", drop);
1117 /* Check that redirecting across reuseport groups is not allowed. */
1118 static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
1119 int verd_map, int reuseport_prog)
1121 struct sockaddr_storage addr;
1127 zero_verdict_count(verd_map);
1129 /* Create two listeners, each in its own reuseport group */
1130 s1 = socket_loopback_reuseport(family, sotype, reuseport_prog);
1134 s2 = socket_loopback_reuseport(family, sotype, reuseport_prog);
1138 err = add_to_sockmap(sock_map, s1, s2);
1142 /* Connect to s2, reuseport BPF selects s1 via sock_map[0] */
1144 err = xgetsockname(s2, sockaddr(&addr), &len);
1148 c = xsocket(family, sotype, 0);
1152 err = connect(c, sockaddr(&addr), len);
1153 if (sotype == SOCK_DGRAM) {
1157 n = xsend(c, &b, sizeof(b), 0);
1161 n = recv_timeout(c, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
1164 if (!err || errno != ECONNREFUSED) {
1165 FAIL_ERRNO("connect: expected ECONNREFUSED");
1169 /* Expect drop, can't redirect outside of reuseport group */
1171 err = xbpf_map_lookup_elem(verd_map, &key, &drop);
1175 FAIL("want drop count 1, have %d", drop);
1185 #define TEST(fn, ...) \
1187 fn, #fn, __VA_ARGS__ \
1190 static void test_ops_cleanup(const struct bpf_map *map)
1195 mapfd = bpf_map__fd(map);
1197 for (key = 0; key < bpf_map__max_entries(map); key++) {
1198 err = bpf_map_delete_elem(mapfd, &key);
1199 if (err && errno != EINVAL && errno != ENOENT)
1200 FAIL_ERRNO("map_delete: expected EINVAL/ENOENT");
1204 static const char *family_str(sa_family_t family)
1220 static const char *map_type_str(const struct bpf_map *map)
1226 type = bpf_map__type(map);
1229 case BPF_MAP_TYPE_SOCKMAP:
1231 case BPF_MAP_TYPE_SOCKHASH:
1238 static const char *sotype_str(int sotype)
1250 static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
1251 int family, int sotype)
1253 const struct op_test {
1254 void (*fn)(struct test_sockmap_listen *skel,
1255 int family, int sotype, int mapfd);
1260 TEST(test_insert_invalid),
1261 TEST(test_insert_opened),
1262 TEST(test_insert_bound, SOCK_STREAM),
1265 TEST(test_delete_after_insert),
1266 TEST(test_delete_after_close),
1268 TEST(test_lookup_after_insert),
1269 TEST(test_lookup_after_delete),
1270 TEST(test_lookup_32_bit_value),
1272 TEST(test_update_existing),
1273 /* races with insert/delete */
1274 TEST(test_destroy_orphan_child, SOCK_STREAM),
1275 TEST(test_syn_recv_insert_delete, SOCK_STREAM),
1276 TEST(test_race_insert_listen, SOCK_STREAM),
1278 TEST(test_clone_after_delete, SOCK_STREAM),
1279 TEST(test_accept_after_delete, SOCK_STREAM),
1280 TEST(test_accept_before_delete, SOCK_STREAM),
1282 const char *family_name, *map_name, *sotype_name;
1283 const struct op_test *t;
1284 char s[MAX_TEST_NAME];
1287 family_name = family_str(family);
1288 map_name = map_type_str(map);
1289 sotype_name = sotype_str(sotype);
1290 map_fd = bpf_map__fd(map);
1292 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1293 snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
1294 sotype_name, t->name);
1296 if (t->sotype != 0 && t->sotype != sotype)
1299 if (!test__start_subtest(s))
1302 t->fn(skel, family, sotype, map_fd);
1303 test_ops_cleanup(map);
1307 static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1308 int family, int sotype)
1310 const struct redir_test {
1311 void (*fn)(struct test_sockmap_listen *skel,
1312 struct bpf_map *map, int family, int sotype);
1315 TEST(test_skb_redir_to_connected),
1316 TEST(test_skb_redir_to_listening),
1317 TEST(test_skb_redir_partial),
1318 TEST(test_msg_redir_to_connected),
1319 TEST(test_msg_redir_to_listening),
1321 const char *family_name, *map_name;
1322 const struct redir_test *t;
1323 char s[MAX_TEST_NAME];
1325 family_name = family_str(family);
1326 map_name = map_type_str(map);
1328 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1329 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name,
1332 if (!test__start_subtest(s))
1335 t->fn(skel, map, family, sotype);
1339 static void unix_redir_to_connected(int sotype, int sock_mapfd,
1340 int verd_mapfd, enum redir_mode mode)
1342 const char *log_prefix = redir_mode_str(mode);
1350 zero_verdict_count(verd_mapfd);
1352 if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
1354 c0 = sfd[0], p0 = sfd[1];
1356 if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
1358 c1 = sfd[0], p1 = sfd[1];
1360 err = add_to_sockmap(sock_mapfd, p0, p1);
1364 n = write(c1, "a", 1);
1366 FAIL_ERRNO("%s: write", log_prefix);
1368 FAIL("%s: incomplete write", log_prefix);
1373 err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1377 FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1379 n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
1381 FAIL_ERRNO("%s: recv_timeout", log_prefix);
1383 FAIL("%s: incomplete recv", log_prefix);
1393 static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
1394 struct bpf_map *inner_map, int sotype)
1396 int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1397 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1398 int sock_map = bpf_map__fd(inner_map);
1401 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1405 skel->bss->test_ingress = false;
1406 unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS);
1407 skel->bss->test_ingress = true;
1408 unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS);
1410 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1413 static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1416 const char *family_name, *map_name;
1417 char s[MAX_TEST_NAME];
1419 family_name = family_str(AF_UNIX);
1420 map_name = map_type_str(map);
1421 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1422 if (!test__start_subtest(s))
1424 unix_skb_redir_to_connected(skel, map, sotype);
1427 /* Returns two connected loopback vsock sockets */
1428 static int vsock_socketpair_connectible(int sotype, int *v0, int *v1)
1430 struct sockaddr_storage addr;
1431 socklen_t len = sizeof(addr);
1434 s = socket_loopback(AF_VSOCK, sotype);
1438 c = xsocket(AF_VSOCK, sotype | SOCK_NONBLOCK, 0);
1442 if (getsockname(s, sockaddr(&addr), &len) < 0)
1445 if (connect(c, sockaddr(&addr), len) < 0 && errno != EINPROGRESS) {
1446 FAIL_ERRNO("connect");
1451 p = accept_timeout(s, sockaddr(&addr), &len, IO_TIMEOUT_SEC);
1455 if (poll_connect(c, IO_TIMEOUT_SEC) < 0) {
1456 FAIL_ERRNO("poll_connect");
1475 static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd,
1476 enum redir_mode mode, int sotype)
1478 const char *log_prefix = redir_mode_str(mode);
1479 char a = 'a', b = 'b';
1486 zero_verdict_count(verd_mapfd);
1488 if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, sfd))
1494 err = vsock_socketpair_connectible(sotype, &v0, &v1);
1496 FAIL("vsock_socketpair_connectible() failed");
1500 err = add_to_sockmap(sock_mapfd, u0, v0);
1502 FAIL("add_to_sockmap failed");
1506 n = write(v1, &a, sizeof(a));
1508 FAIL_ERRNO("%s: write", log_prefix);
1510 FAIL("%s: incomplete write", log_prefix);
1514 n = xrecv_nonblock(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), 0);
1516 FAIL("%s: recv() err, errno=%d", log_prefix, errno);
1518 FAIL("%s: incomplete recv", log_prefix);
1520 FAIL("%s: vsock socket map failed, %c != %c", log_prefix, a, b);
1523 err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1527 FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1530 bpf_map_delete_elem(sock_mapfd, &key);
1532 bpf_map_delete_elem(sock_mapfd, &key);
1543 static void vsock_unix_skb_redir_connectible(struct test_sockmap_listen *skel,
1544 struct bpf_map *inner_map,
1547 int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1548 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1549 int sock_map = bpf_map__fd(inner_map);
1552 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1556 skel->bss->test_ingress = false;
1557 vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_EGRESS, sotype);
1558 skel->bss->test_ingress = true;
1559 vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_INGRESS, sotype);
1561 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1564 static void test_vsock_redir(struct test_sockmap_listen *skel, struct bpf_map *map)
1566 const char *family_name, *map_name;
1567 char s[MAX_TEST_NAME];
1569 family_name = family_str(AF_VSOCK);
1570 map_name = map_type_str(map);
1571 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1572 if (!test__start_subtest(s))
1575 vsock_unix_skb_redir_connectible(skel, map, SOCK_STREAM);
1576 vsock_unix_skb_redir_connectible(skel, map, SOCK_SEQPACKET);
1579 static void test_reuseport(struct test_sockmap_listen *skel,
1580 struct bpf_map *map, int family, int sotype)
1582 const struct reuseport_test {
1583 void (*fn)(int family, int sotype, int socket_map,
1584 int verdict_map, int reuseport_prog);
1588 TEST(test_reuseport_select_listening),
1589 TEST(test_reuseport_select_connected),
1590 TEST(test_reuseport_mixed_groups),
1592 int socket_map, verdict_map, reuseport_prog;
1593 const char *family_name, *map_name, *sotype_name;
1594 const struct reuseport_test *t;
1595 char s[MAX_TEST_NAME];
1597 family_name = family_str(family);
1598 map_name = map_type_str(map);
1599 sotype_name = sotype_str(sotype);
1601 socket_map = bpf_map__fd(map);
1602 verdict_map = bpf_map__fd(skel->maps.verdict_map);
1603 reuseport_prog = bpf_program__fd(skel->progs.prog_reuseport);
1605 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1606 snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
1607 sotype_name, t->name);
1609 if (t->sotype != 0 && t->sotype != sotype)
1612 if (!test__start_subtest(s))
1615 t->fn(family, sotype, socket_map, verdict_map, reuseport_prog);
1619 static int inet_socketpair(int family, int type, int *s, int *c)
1621 struct sockaddr_storage addr;
1626 p0 = socket_loopback(family, type | SOCK_NONBLOCK);
1631 err = xgetsockname(p0, sockaddr(&addr), &len);
1635 c0 = xsocket(family, type | SOCK_NONBLOCK, 0);
1640 err = xconnect(c0, sockaddr(&addr), len);
1643 err = xgetsockname(c0, sockaddr(&addr), &len);
1646 err = xconnect(p0, sockaddr(&addr), len);
1661 static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
1662 enum redir_mode mode)
1664 const char *log_prefix = redir_mode_str(mode);
1671 zero_verdict_count(verd_mapfd);
1673 err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
1676 err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
1680 err = add_to_sockmap(sock_mapfd, p0, p1);
1684 n = write(c1, "a", 1);
1686 FAIL_ERRNO("%s: write", log_prefix);
1688 FAIL("%s: incomplete write", log_prefix);
1693 err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1697 FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1699 n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
1701 FAIL_ERRNO("%s: recv_timeout", log_prefix);
1703 FAIL("%s: incomplete recv", log_prefix);
1713 static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
1714 struct bpf_map *inner_map, int family)
1716 int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1717 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1718 int sock_map = bpf_map__fd(inner_map);
1721 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1725 skel->bss->test_ingress = false;
1726 udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
1727 skel->bss->test_ingress = true;
1728 udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
1730 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1733 static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1736 const char *family_name, *map_name;
1737 char s[MAX_TEST_NAME];
1739 family_name = family_str(family);
1740 map_name = map_type_str(map);
1741 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1742 if (!test__start_subtest(s))
1744 udp_skb_redir_to_connected(skel, map, family);
1747 static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
1748 int verd_mapfd, enum redir_mode mode)
1750 const char *log_prefix = redir_mode_str(mode);
1758 zero_verdict_count(verd_mapfd);
1760 if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
1762 c0 = sfd[0], p0 = sfd[1];
1764 err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
1768 err = add_to_sockmap(sock_mapfd, p0, p1);
1772 n = write(c1, "a", 1);
1774 FAIL_ERRNO("%s: write", log_prefix);
1776 FAIL("%s: incomplete write", log_prefix);
1781 err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1785 FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1787 n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
1789 FAIL_ERRNO("%s: recv_timeout", log_prefix);
1791 FAIL("%s: incomplete recv", log_prefix);
1801 static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
1802 struct bpf_map *inner_map, int family)
1804 int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1805 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1806 int sock_map = bpf_map__fd(inner_map);
1809 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1813 skel->bss->test_ingress = false;
1814 inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1816 inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1818 skel->bss->test_ingress = true;
1819 inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1821 inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1824 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1827 static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
1828 int verd_mapfd, enum redir_mode mode)
1830 const char *log_prefix = redir_mode_str(mode);
1838 zero_verdict_count(verd_mapfd);
1840 err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
1844 if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
1846 c1 = sfd[0], p1 = sfd[1];
1848 err = add_to_sockmap(sock_mapfd, p0, p1);
1852 n = write(c1, "a", 1);
1854 FAIL_ERRNO("%s: write", log_prefix);
1856 FAIL("%s: incomplete write", log_prefix);
1861 err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1865 FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1867 n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
1869 FAIL_ERRNO("%s: recv_timeout", log_prefix);
1871 FAIL("%s: incomplete recv", log_prefix);
1882 static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
1883 struct bpf_map *inner_map, int family)
1885 int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1886 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1887 int sock_map = bpf_map__fd(inner_map);
1890 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1894 skel->bss->test_ingress = false;
1895 unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1897 unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1899 skel->bss->test_ingress = true;
1900 unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1902 unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1905 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1908 static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1911 const char *family_name, *map_name;
1912 char s[MAX_TEST_NAME];
1914 family_name = family_str(family);
1915 map_name = map_type_str(map);
1916 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1917 if (!test__start_subtest(s))
1919 inet_unix_skb_redir_to_connected(skel, map, family);
1920 unix_inet_skb_redir_to_connected(skel, map, family);
1923 static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
1926 test_ops(skel, map, family, SOCK_STREAM);
1927 test_ops(skel, map, family, SOCK_DGRAM);
1928 test_redir(skel, map, family, SOCK_STREAM);
1929 test_reuseport(skel, map, family, SOCK_STREAM);
1930 test_reuseport(skel, map, family, SOCK_DGRAM);
1931 test_udp_redir(skel, map, family);
1932 test_udp_unix_redir(skel, map, family);
1935 void serial_test_sockmap_listen(void)
1937 struct test_sockmap_listen *skel;
1939 skel = test_sockmap_listen__open_and_load();
1941 FAIL("skeleton open/load failed");
1945 skel->bss->test_sockmap = true;
1946 run_tests(skel, skel->maps.sock_map, AF_INET);
1947 run_tests(skel, skel->maps.sock_map, AF_INET6);
1948 test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM);
1949 test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM);
1950 test_vsock_redir(skel, skel->maps.sock_map);
1952 skel->bss->test_sockmap = false;
1953 run_tests(skel, skel->maps.sock_hash, AF_INET);
1954 run_tests(skel, skel->maps.sock_hash, AF_INET6);
1955 test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM);
1956 test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM);
1957 test_vsock_redir(skel, skel->maps.sock_hash);
1959 test_sockmap_listen__destroy(skel);