1 // SPDX-License-Identifier: GPL-2.0-or-later
4 * Linux INET6 implementation
7 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp.c
11 * linux/net/ipv4/tcp_input.c
12 * linux/net/ipv4/tcp_output.c
15 * Hideaki YOSHIFUJI : sin6_scope_id support
16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
18 * a single port at the same time.
19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
69 #include <trace/events/tcp.h>
71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 struct request_sock *req);
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 const struct in6_addr *addr,
91 /* Helper returning the inet6 address from a given tcp socket.
92 * It can be used in TCP stack instead of inet6_sk(sk).
93 * This avoids a dereference and allow compiler optimizations.
94 * It is a specialized version of inet6_sk_generic().
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
98 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
100 return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
105 struct dst_entry *dst = skb_dst(skb);
107 if (dst && dst_hold_safe(dst)) {
108 const struct rt6_info *rt = (const struct rt6_info *)dst;
110 rcu_assign_pointer(sk->sk_rx_dst, dst);
111 sk->sk_rx_dst_ifindex = skb->skb_iif;
112 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
118 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119 ipv6_hdr(skb)->saddr.s6_addr32,
121 tcp_hdr(skb)->source);
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
126 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127 ipv6_hdr(skb)->saddr.s6_addr32);
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
133 /* This check is replicated from tcp_v6_connect() and intended to
134 * prevent BPF program called below from accessing bytes that are out
135 * of the bound specified by user in addr_len.
137 if (addr_len < SIN6_LEN_RFC2133)
140 sock_owned_by_me(sk);
142 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149 struct inet_sock *inet = inet_sk(sk);
150 struct inet_connection_sock *icsk = inet_csk(sk);
151 struct inet_timewait_death_row *tcp_death_row;
152 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
153 struct tcp_sock *tp = tcp_sk(sk);
154 struct in6_addr *saddr = NULL, *final_p, final;
155 struct ipv6_txoptions *opt;
157 struct dst_entry *dst;
161 if (addr_len < SIN6_LEN_RFC2133)
164 if (usin->sin6_family != AF_INET6)
165 return -EAFNOSUPPORT;
167 memset(&fl6, 0, sizeof(fl6));
170 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171 IP6_ECN_flow_init(fl6.flowlabel);
172 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173 struct ip6_flowlabel *flowlabel;
174 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175 if (IS_ERR(flowlabel))
177 fl6_sock_release(flowlabel);
182 * connect() to INADDR_ANY means loopback (BSD'ism).
185 if (ipv6_addr_any(&usin->sin6_addr)) {
186 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
190 usin->sin6_addr = in6addr_loopback;
193 addr_type = ipv6_addr_type(&usin->sin6_addr);
195 if (addr_type & IPV6_ADDR_MULTICAST)
198 if (addr_type&IPV6_ADDR_LINKLOCAL) {
199 if (addr_len >= sizeof(struct sockaddr_in6) &&
200 usin->sin6_scope_id) {
201 /* If interface is set while binding, indices
204 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
207 sk->sk_bound_dev_if = usin->sin6_scope_id;
210 /* Connect to link-local address requires an interface */
211 if (!sk->sk_bound_dev_if)
215 if (tp->rx_opt.ts_recent_stamp &&
216 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217 tp->rx_opt.ts_recent = 0;
218 tp->rx_opt.ts_recent_stamp = 0;
219 WRITE_ONCE(tp->write_seq, 0);
222 sk->sk_v6_daddr = usin->sin6_addr;
223 np->flow_label = fl6.flowlabel;
229 if (addr_type & IPV6_ADDR_MAPPED) {
230 u32 exthdrlen = icsk->icsk_ext_hdr_len;
231 struct sockaddr_in sin;
233 if (__ipv6_only_sock(sk))
236 sin.sin_family = AF_INET;
237 sin.sin_port = usin->sin6_port;
238 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
240 icsk->icsk_af_ops = &ipv6_mapped;
242 mptcpv6_handle_mapped(sk, true);
243 sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
248 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
251 icsk->icsk_ext_hdr_len = exthdrlen;
252 icsk->icsk_af_ops = &ipv6_specific;
254 mptcpv6_handle_mapped(sk, false);
255 sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257 tp->af_specific = &tcp_sock_ipv6_specific;
261 np->saddr = sk->sk_v6_rcv_saddr;
266 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267 saddr = &sk->sk_v6_rcv_saddr;
269 fl6.flowi6_proto = IPPROTO_TCP;
270 fl6.daddr = sk->sk_v6_daddr;
271 fl6.saddr = saddr ? *saddr : np->saddr;
272 fl6.flowi6_oif = sk->sk_bound_dev_if;
273 fl6.flowi6_mark = sk->sk_mark;
274 fl6.fl6_dport = usin->sin6_port;
275 fl6.fl6_sport = inet->inet_sport;
276 fl6.flowi6_uid = sk->sk_uid;
278 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279 final_p = fl6_update_dst(&fl6, opt, &final);
281 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
283 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
291 sk->sk_v6_rcv_saddr = *saddr;
294 /* set the source address */
296 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
298 sk->sk_gso_type = SKB_GSO_TCPV6;
299 ip6_dst_store(sk, dst, NULL, NULL);
301 icsk->icsk_ext_hdr_len = 0;
303 icsk->icsk_ext_hdr_len = opt->opt_flen +
306 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
308 inet->inet_dport = usin->sin6_port;
310 tcp_set_state(sk, TCP_SYN_SENT);
311 tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
312 err = inet6_hash_connect(tcp_death_row, sk);
318 if (likely(!tp->repair)) {
320 WRITE_ONCE(tp->write_seq,
321 secure_tcpv6_seq(np->saddr.s6_addr32,
322 sk->sk_v6_daddr.s6_addr32,
325 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
327 sk->sk_v6_daddr.s6_addr32);
330 if (tcp_fastopen_defer_connect(sk, &err))
335 err = tcp_connect(sk);
342 tcp_set_state(sk, TCP_CLOSE);
344 inet->inet_dport = 0;
345 sk->sk_route_caps = 0;
349 static void tcp_v6_mtu_reduced(struct sock *sk)
351 struct dst_entry *dst;
354 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
357 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
359 /* Drop requests trying to increase our current mss.
360 * Check done in __ip6_rt_update_pmtu() is too late.
362 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
365 dst = inet6_csk_update_pmtu(sk, mtu);
369 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
370 tcp_sync_mss(sk, dst_mtu(dst));
371 tcp_simple_retransmit(sk);
375 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
376 u8 type, u8 code, int offset, __be32 info)
378 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
379 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
380 struct net *net = dev_net(skb->dev);
381 struct request_sock *fastopen;
382 struct ipv6_pinfo *np;
389 sk = __inet6_lookup_established(net, &tcp_hashinfo,
390 &hdr->daddr, th->dest,
391 &hdr->saddr, ntohs(th->source),
392 skb->dev->ifindex, inet6_sdif(skb));
395 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
400 if (sk->sk_state == TCP_TIME_WAIT) {
401 inet_twsk_put(inet_twsk(sk));
404 seq = ntohl(th->seq);
405 fatal = icmpv6_err_convert(type, code, &err);
406 if (sk->sk_state == TCP_NEW_SYN_RECV) {
407 tcp_req_err(sk, seq, fatal);
412 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
413 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
415 if (sk->sk_state == TCP_CLOSE)
418 if (static_branch_unlikely(&ip6_min_hopcount)) {
419 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
420 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
421 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
427 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
428 fastopen = rcu_dereference(tp->fastopen_rsk);
429 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
430 if (sk->sk_state != TCP_LISTEN &&
431 !between(seq, snd_una, tp->snd_nxt)) {
432 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
436 np = tcp_inet6_sk(sk);
438 if (type == NDISC_REDIRECT) {
439 if (!sock_owned_by_user(sk)) {
440 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
443 dst->ops->redirect(dst, sk, skb);
448 if (type == ICMPV6_PKT_TOOBIG) {
449 u32 mtu = ntohl(info);
451 /* We are not interested in TCP_LISTEN and open_requests
452 * (SYN-ACKs send out by Linux are always <576bytes so
453 * they should go through unfragmented).
455 if (sk->sk_state == TCP_LISTEN)
458 if (!ip6_sk_accept_pmtu(sk))
461 if (mtu < IPV6_MIN_MTU)
464 WRITE_ONCE(tp->mtu_info, mtu);
466 if (!sock_owned_by_user(sk))
467 tcp_v6_mtu_reduced(sk);
468 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
475 /* Might be for an request_sock */
476 switch (sk->sk_state) {
479 /* Only in fast or simultaneous open. If a fast open socket is
480 * already accepted it is treated as a connected one below.
482 if (fastopen && !fastopen->sk)
485 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
487 if (!sock_owned_by_user(sk)) {
489 sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
493 sk->sk_err_soft = err;
498 /* check if this ICMP message allows revert of backoff.
501 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
502 code == ICMPV6_NOROUTE)
503 tcp_ld_RTO_revert(sk, seq);
506 if (!sock_owned_by_user(sk) && np->recverr) {
510 sk->sk_err_soft = err;
519 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
521 struct request_sock *req,
522 struct tcp_fastopen_cookie *foc,
523 enum tcp_synack_type synack_type,
524 struct sk_buff *syn_skb)
526 struct inet_request_sock *ireq = inet_rsk(req);
527 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
528 struct ipv6_txoptions *opt;
529 struct flowi6 *fl6 = &fl->u.ip6;
534 /* First, grab a route. */
535 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
536 IPPROTO_TCP)) == NULL)
539 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
542 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
543 &ireq->ir_v6_rmt_addr);
545 fl6->daddr = ireq->ir_v6_rmt_addr;
546 if (np->repflow && ireq->pktopts)
547 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
549 tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
550 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
551 (np->tclass & INET_ECN_MASK) :
554 if (!INET_ECN_is_capable(tclass) &&
555 tcp_bpf_ca_needs_ecn((struct sock *)req))
556 tclass |= INET_ECN_ECT_0;
559 opt = ireq->ipv6_opt;
561 opt = rcu_dereference(np->opt);
562 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
563 tclass, sk->sk_priority);
565 err = net_xmit_eval(err);
573 static void tcp_v6_reqsk_destructor(struct request_sock *req)
575 kfree(inet_rsk(req)->ipv6_opt);
576 consume_skb(inet_rsk(req)->pktopts);
579 #ifdef CONFIG_TCP_MD5SIG
580 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
581 const struct in6_addr *addr,
584 return tcp_md5_do_lookup(sk, l3index,
585 (union tcp_md5_addr *)addr, AF_INET6);
588 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
589 const struct sock *addr_sk)
593 l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
594 addr_sk->sk_bound_dev_if);
595 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
599 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
600 sockptr_t optval, int optlen)
602 struct tcp_md5sig cmd;
603 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
608 if (optlen < sizeof(cmd))
611 if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
614 if (sin6->sin6_family != AF_INET6)
617 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
619 if (optname == TCP_MD5SIG_EXT &&
620 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
621 prefixlen = cmd.tcpm_prefixlen;
622 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
626 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
629 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
630 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
631 struct net_device *dev;
634 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
635 if (dev && netif_is_l3_master(dev))
636 l3index = dev->ifindex;
639 /* ok to reference set/not set outside of rcu;
640 * right now device MUST be an L3 master
642 if (!dev || !l3index)
646 if (!cmd.tcpm_keylen) {
647 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
648 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
651 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
652 AF_INET6, prefixlen, l3index, flags);
655 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
658 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
659 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
660 AF_INET, prefixlen, l3index, flags,
661 cmd.tcpm_key, cmd.tcpm_keylen,
664 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
665 AF_INET6, prefixlen, l3index, flags,
666 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
669 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
670 const struct in6_addr *daddr,
671 const struct in6_addr *saddr,
672 const struct tcphdr *th, int nbytes)
674 struct tcp6_pseudohdr *bp;
675 struct scatterlist sg;
679 /* 1. TCP pseudo-header (RFC2460) */
682 bp->protocol = cpu_to_be32(IPPROTO_TCP);
683 bp->len = cpu_to_be32(nbytes);
685 _th = (struct tcphdr *)(bp + 1);
686 memcpy(_th, th, sizeof(*th));
689 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
690 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
691 sizeof(*bp) + sizeof(*th));
692 return crypto_ahash_update(hp->md5_req);
695 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
696 const struct in6_addr *daddr, struct in6_addr *saddr,
697 const struct tcphdr *th)
699 struct tcp_md5sig_pool *hp;
700 struct ahash_request *req;
702 hp = tcp_get_md5sig_pool();
704 goto clear_hash_noput;
707 if (crypto_ahash_init(req))
709 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
711 if (tcp_md5_hash_key(hp, key))
713 ahash_request_set_crypt(req, NULL, md5_hash, 0);
714 if (crypto_ahash_final(req))
717 tcp_put_md5sig_pool();
721 tcp_put_md5sig_pool();
723 memset(md5_hash, 0, 16);
727 static int tcp_v6_md5_hash_skb(char *md5_hash,
728 const struct tcp_md5sig_key *key,
729 const struct sock *sk,
730 const struct sk_buff *skb)
732 const struct in6_addr *saddr, *daddr;
733 struct tcp_md5sig_pool *hp;
734 struct ahash_request *req;
735 const struct tcphdr *th = tcp_hdr(skb);
737 if (sk) { /* valid for establish/request sockets */
738 saddr = &sk->sk_v6_rcv_saddr;
739 daddr = &sk->sk_v6_daddr;
741 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
742 saddr = &ip6h->saddr;
743 daddr = &ip6h->daddr;
746 hp = tcp_get_md5sig_pool();
748 goto clear_hash_noput;
751 if (crypto_ahash_init(req))
754 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
756 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
758 if (tcp_md5_hash_key(hp, key))
760 ahash_request_set_crypt(req, NULL, md5_hash, 0);
761 if (crypto_ahash_final(req))
764 tcp_put_md5sig_pool();
768 tcp_put_md5sig_pool();
770 memset(md5_hash, 0, 16);
776 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
777 const struct sk_buff *skb,
779 enum skb_drop_reason *reason)
781 #ifdef CONFIG_TCP_MD5SIG
782 const __u8 *hash_location = NULL;
783 struct tcp_md5sig_key *hash_expected;
784 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
785 const struct tcphdr *th = tcp_hdr(skb);
786 int genhash, l3index;
789 /* sdif set, means packet ingressed via a device
790 * in an L3 domain and dif is set to the l3mdev
792 l3index = sdif ? dif : 0;
794 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
795 hash_location = tcp_parse_md5sig_option(th);
797 /* We've parsed the options - do we have a hash? */
798 if (!hash_expected && !hash_location)
801 if (hash_expected && !hash_location) {
802 *reason = SKB_DROP_REASON_TCP_MD5NOTFOUND;
803 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
807 if (!hash_expected && hash_location) {
808 *reason = SKB_DROP_REASON_TCP_MD5UNEXPECTED;
809 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
813 /* check the signature */
814 genhash = tcp_v6_md5_hash_skb(newhash,
818 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
819 *reason = SKB_DROP_REASON_TCP_MD5FAILURE;
820 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
821 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
822 genhash ? "failed" : "mismatch",
823 &ip6h->saddr, ntohs(th->source),
824 &ip6h->daddr, ntohs(th->dest), l3index);
831 static void tcp_v6_init_req(struct request_sock *req,
832 const struct sock *sk_listener,
835 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
836 struct inet_request_sock *ireq = inet_rsk(req);
837 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
839 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
840 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
842 /* So that link locals have meaning */
843 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
844 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
845 ireq->ir_iif = tcp_v6_iif(skb);
847 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
848 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
849 np->rxopt.bits.rxinfo ||
850 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
851 np->rxopt.bits.rxohlim || np->repflow)) {
852 refcount_inc(&skb->users);
857 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
860 struct request_sock *req)
862 tcp_v6_init_req(req, sk, skb);
864 if (security_inet_conn_request(sk, skb, req))
867 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
870 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
872 .obj_size = sizeof(struct tcp6_request_sock),
873 .rtx_syn_ack = tcp_rtx_synack,
874 .send_ack = tcp_v6_reqsk_send_ack,
875 .destructor = tcp_v6_reqsk_destructor,
876 .send_reset = tcp_v6_send_reset,
877 .syn_ack_timeout = tcp_syn_ack_timeout,
880 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
881 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
882 sizeof(struct ipv6hdr),
883 #ifdef CONFIG_TCP_MD5SIG
884 .req_md5_lookup = tcp_v6_md5_lookup,
885 .calc_md5_hash = tcp_v6_md5_hash_skb,
887 #ifdef CONFIG_SYN_COOKIES
888 .cookie_init_seq = cookie_v6_init_sequence,
890 .route_req = tcp_v6_route_req,
891 .init_seq = tcp_v6_init_seq,
892 .init_ts_off = tcp_v6_init_ts_off,
893 .send_synack = tcp_v6_send_synack,
896 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
897 u32 ack, u32 win, u32 tsval, u32 tsecr,
898 int oif, struct tcp_md5sig_key *key, int rst,
899 u8 tclass, __be32 label, u32 priority)
901 const struct tcphdr *th = tcp_hdr(skb);
903 struct sk_buff *buff;
905 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
906 struct sock *ctl_sk = net->ipv6.tcp_sk;
907 unsigned int tot_len = sizeof(struct tcphdr);
908 __be32 mrst = 0, *topt;
909 struct dst_entry *dst;
913 tot_len += TCPOLEN_TSTAMP_ALIGNED;
914 #ifdef CONFIG_TCP_MD5SIG
916 tot_len += TCPOLEN_MD5SIG_ALIGNED;
921 mrst = mptcp_reset_option(skb);
924 tot_len += sizeof(__be32);
928 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
932 skb_reserve(buff, MAX_TCP_HEADER);
934 t1 = skb_push(buff, tot_len);
935 skb_reset_transport_header(buff);
937 /* Swap the send and the receive. */
938 memset(t1, 0, sizeof(*t1));
939 t1->dest = th->source;
940 t1->source = th->dest;
941 t1->doff = tot_len / 4;
942 t1->seq = htonl(seq);
943 t1->ack_seq = htonl(ack);
944 t1->ack = !rst || !th->ack;
946 t1->window = htons(win);
948 topt = (__be32 *)(t1 + 1);
951 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
952 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
953 *topt++ = htonl(tsval);
954 *topt++ = htonl(tsecr);
960 #ifdef CONFIG_TCP_MD5SIG
962 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
963 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
964 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
965 &ipv6_hdr(skb)->saddr,
966 &ipv6_hdr(skb)->daddr, t1);
970 memset(&fl6, 0, sizeof(fl6));
971 fl6.daddr = ipv6_hdr(skb)->saddr;
972 fl6.saddr = ipv6_hdr(skb)->daddr;
973 fl6.flowlabel = label;
975 buff->ip_summed = CHECKSUM_PARTIAL;
977 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
979 fl6.flowi6_proto = IPPROTO_TCP;
980 if (rt6_need_strict(&fl6.daddr) && !oif)
981 fl6.flowi6_oif = tcp_v6_iif(skb);
983 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
986 fl6.flowi6_oif = oif;
990 if (sk->sk_state == TCP_TIME_WAIT) {
991 mark = inet_twsk(sk)->tw_mark;
992 /* autoflowlabel relies on buff->hash */
993 skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
998 buff->tstamp = tcp_transmit_time(sk);
1000 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
1001 fl6.fl6_dport = t1->dest;
1002 fl6.fl6_sport = t1->source;
1003 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
1004 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
1006 /* Pass a socket to ip6_dst_lookup either it is for RST
1007 * Underlying function will use this to retrieve the network
1010 dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
1012 skb_dst_set(buff, dst);
1013 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
1014 tclass & ~INET_ECN_MASK, priority);
1015 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
1017 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
1024 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1026 const struct tcphdr *th = tcp_hdr(skb);
1027 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1028 u32 seq = 0, ack_seq = 0;
1029 struct tcp_md5sig_key *key = NULL;
1030 #ifdef CONFIG_TCP_MD5SIG
1031 const __u8 *hash_location = NULL;
1032 unsigned char newhash[16];
1034 struct sock *sk1 = NULL;
1044 /* If sk not NULL, it means we did a successful lookup and incoming
1045 * route had to be correct. prequeue might have dropped our dst.
1047 if (!sk && !ipv6_unicast_destination(skb))
1050 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1051 #ifdef CONFIG_TCP_MD5SIG
1053 hash_location = tcp_parse_md5sig_option(th);
1054 if (sk && sk_fullsock(sk)) {
1057 /* sdif set, means packet ingressed via a device
1058 * in an L3 domain and inet_iif is set to it.
1060 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1061 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1062 } else if (hash_location) {
1063 int dif = tcp_v6_iif_l3_slave(skb);
1064 int sdif = tcp_v6_sdif(skb);
1068 * active side is lost. Try to find listening socket through
1069 * source port, and then find md5 key through listening socket.
1070 * we are not loose security here:
1071 * Incoming packet is checked with md5 hash with finding key,
1072 * no RST generated if md5 hash doesn't match.
1074 sk1 = inet6_lookup_listener(net,
1075 &tcp_hashinfo, NULL, 0,
1077 th->source, &ipv6h->daddr,
1078 ntohs(th->source), dif, sdif);
1082 /* sdif set, means packet ingressed via a device
1083 * in an L3 domain and dif is set to it.
1085 l3index = tcp_v6_sdif(skb) ? dif : 0;
1087 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1091 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1092 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1098 seq = ntohl(th->ack_seq);
1100 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1104 oif = sk->sk_bound_dev_if;
1105 if (sk_fullsock(sk)) {
1106 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1108 trace_tcp_send_reset(sk, skb);
1110 label = ip6_flowlabel(ipv6h);
1111 priority = sk->sk_priority;
1113 if (sk->sk_state == TCP_TIME_WAIT) {
1114 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1115 priority = inet_twsk(sk)->tw_priority;
1118 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1119 label = ip6_flowlabel(ipv6h);
1122 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1123 ipv6_get_dsfield(ipv6h), label, priority);
1125 #ifdef CONFIG_TCP_MD5SIG
1131 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1132 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1133 struct tcp_md5sig_key *key, u8 tclass,
1134 __be32 label, u32 priority)
1136 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1137 tclass, label, priority);
1140 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1142 struct inet_timewait_sock *tw = inet_twsk(sk);
1143 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1145 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1146 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1147 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1148 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1149 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1154 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1155 struct request_sock *req)
1159 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1161 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1162 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1165 * The window field (SEG.WND) of every outgoing segment, with the
1166 * exception of <SYN> segments, MUST be right-shifted by
1167 * Rcv.Wind.Shift bits:
1169 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1170 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1171 tcp_rsk(req)->rcv_nxt,
1172 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1173 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1174 req->ts_recent, sk->sk_bound_dev_if,
1175 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1176 ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1180 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1182 #ifdef CONFIG_SYN_COOKIES
1183 const struct tcphdr *th = tcp_hdr(skb);
1186 sk = cookie_v6_check(sk, skb);
1191 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1192 struct tcphdr *th, u32 *cookie)
1195 #ifdef CONFIG_SYN_COOKIES
1196 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1197 &tcp_request_sock_ipv6_ops, sk, th);
1199 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1200 tcp_synq_overflow(sk);
1206 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1208 if (skb->protocol == htons(ETH_P_IP))
1209 return tcp_v4_conn_request(sk, skb);
1211 if (!ipv6_unicast_destination(skb))
1214 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1215 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1219 return tcp_conn_request(&tcp6_request_sock_ops,
1220 &tcp_request_sock_ipv6_ops, sk, skb);
1224 return 0; /* don't send reset */
1227 static void tcp_v6_restore_cb(struct sk_buff *skb)
1229 /* We need to move header back to the beginning if xfrm6_policy_check()
1230 * and tcp_v6_fill_cb() are going to be called again.
1231 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1233 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1234 sizeof(struct inet6_skb_parm));
1237 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1238 struct request_sock *req,
1239 struct dst_entry *dst,
1240 struct request_sock *req_unhash,
1243 struct inet_request_sock *ireq;
1244 struct ipv6_pinfo *newnp;
1245 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1246 struct ipv6_txoptions *opt;
1247 struct inet_sock *newinet;
1248 bool found_dup_sk = false;
1249 struct tcp_sock *newtp;
1251 #ifdef CONFIG_TCP_MD5SIG
1252 struct tcp_md5sig_key *key;
1257 if (skb->protocol == htons(ETH_P_IP)) {
1262 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1263 req_unhash, own_req);
1268 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1270 newnp = tcp_inet6_sk(newsk);
1271 newtp = tcp_sk(newsk);
1273 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1275 newnp->saddr = newsk->sk_v6_rcv_saddr;
1277 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1278 if (sk_is_mptcp(newsk))
1279 mptcpv6_handle_mapped(newsk, true);
1280 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1281 #ifdef CONFIG_TCP_MD5SIG
1282 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1285 newnp->ipv6_mc_list = NULL;
1286 newnp->ipv6_ac_list = NULL;
1287 newnp->ipv6_fl_list = NULL;
1288 newnp->pktoptions = NULL;
1290 newnp->mcast_oif = inet_iif(skb);
1291 newnp->mcast_hops = ip_hdr(skb)->ttl;
1292 newnp->rcv_flowinfo = 0;
1294 newnp->flow_label = 0;
1297 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1298 * here, tcp_create_openreq_child now does this for us, see the comment in
1299 * that function for the gory details. -acme
1302 /* It is tricky place. Until this moment IPv4 tcp
1303 worked with IPv6 icsk.icsk_af_ops.
1306 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1311 ireq = inet_rsk(req);
1313 if (sk_acceptq_is_full(sk))
1317 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1322 newsk = tcp_create_openreq_child(sk, req, skb);
1327 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1328 * count here, tcp_create_openreq_child now does this for us, see the
1329 * comment in that function for the gory details. -acme
1332 newsk->sk_gso_type = SKB_GSO_TCPV6;
1333 ip6_dst_store(newsk, dst, NULL, NULL);
1334 inet6_sk_rx_dst_set(newsk, skb);
1336 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1338 newtp = tcp_sk(newsk);
1339 newinet = inet_sk(newsk);
1340 newnp = tcp_inet6_sk(newsk);
1342 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1344 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1345 newnp->saddr = ireq->ir_v6_loc_addr;
1346 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1347 newsk->sk_bound_dev_if = ireq->ir_iif;
1349 /* Now IPv6 options...
1351 First: no IPv4 options.
1353 newinet->inet_opt = NULL;
1354 newnp->ipv6_mc_list = NULL;
1355 newnp->ipv6_ac_list = NULL;
1356 newnp->ipv6_fl_list = NULL;
1359 newnp->rxopt.all = np->rxopt.all;
1361 newnp->pktoptions = NULL;
1363 newnp->mcast_oif = tcp_v6_iif(skb);
1364 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1365 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1367 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1369 /* Set ToS of the new socket based upon the value of incoming SYN.
1370 * ECT bits are set later in tcp_init_transfer().
1372 if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
1373 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1375 /* Clone native IPv6 options from listening socket (if any)
1377 Yes, keeping reference count would be much more clever,
1378 but we make one more one thing there: reattach optmem
1381 opt = ireq->ipv6_opt;
1383 opt = rcu_dereference(np->opt);
1385 opt = ipv6_dup_options(newsk, opt);
1386 RCU_INIT_POINTER(newnp->opt, opt);
1388 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1390 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1393 tcp_ca_openreq_child(newsk, dst);
1395 tcp_sync_mss(newsk, dst_mtu(dst));
1396 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1398 tcp_initialize_rcv_mss(newsk);
1400 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1401 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1403 #ifdef CONFIG_TCP_MD5SIG
1404 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1406 /* Copy over the MD5 key from the original socket */
1407 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1409 /* We're using one, so create a matching key
1410 * on the newsk structure. If we fail to get
1411 * memory, then we end up not copying the key
1414 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1415 AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
1416 sk_gfp_mask(sk, GFP_ATOMIC));
1420 if (__inet_inherit_port(sk, newsk) < 0) {
1421 inet_csk_prepare_forced_close(newsk);
1425 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1428 tcp_move_syn(newtp, req);
1430 /* Clone pktoptions received with SYN, if we own the req */
1431 if (ireq->pktopts) {
1432 newnp->pktoptions = skb_clone(ireq->pktopts,
1433 sk_gfp_mask(sk, GFP_ATOMIC));
1434 consume_skb(ireq->pktopts);
1435 ireq->pktopts = NULL;
1436 if (newnp->pktoptions) {
1437 tcp_v6_restore_cb(newnp->pktoptions);
1438 skb_set_owner_r(newnp->pktoptions, newsk);
1442 if (!req_unhash && found_dup_sk) {
1443 /* This code path should only be executed in the
1444 * syncookie case only
1446 bh_unlock_sock(newsk);
1455 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1463 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1465 /* The socket must have it's spinlock held when we get
1466 * here, unless it is a TCP_LISTEN socket.
1468 * We have a potential double-lock case here, so even when
1469 * doing backlog processing we use the BH locking scheme.
1470 * This is because we cannot sleep with the original spinlock
1473 INDIRECT_CALLABLE_SCOPE
1474 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1476 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1477 struct sk_buff *opt_skb = NULL;
1478 enum skb_drop_reason reason;
1479 struct tcp_sock *tp;
1481 /* Imagine: socket is IPv6. IPv4 packet arrives,
1482 goes to IPv4 receive handler and backlogged.
1483 From backlog it always goes here. Kerboom...
1484 Fortunately, tcp_rcv_established and rcv_established
1485 handle them correctly, but it is not case with
1486 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1489 if (skb->protocol == htons(ETH_P_IP))
1490 return tcp_v4_do_rcv(sk, skb);
1493 * socket locking is here for SMP purposes as backlog rcv
1494 * is currently called with bh processing disabled.
1497 /* Do Stevens' IPV6_PKTOPTIONS.
1499 Yes, guys, it is the only place in our code, where we
1500 may make it not affecting IPv4.
1501 The rest of code is protocol independent,
1502 and I do not like idea to uglify IPv4.
1504 Actually, all the idea behind IPV6_PKTOPTIONS
1505 looks not very well thought. For now we latch
1506 options, received in the last packet, enqueued
1507 by tcp. Feel free to propose better solution.
1511 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1513 reason = SKB_DROP_REASON_NOT_SPECIFIED;
1514 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1515 struct dst_entry *dst;
1517 dst = rcu_dereference_protected(sk->sk_rx_dst,
1518 lockdep_sock_is_held(sk));
1520 sock_rps_save_rxhash(sk, skb);
1521 sk_mark_napi_id(sk, skb);
1523 if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1524 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1525 dst, sk->sk_rx_dst_cookie) == NULL) {
1526 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1531 tcp_rcv_established(sk, skb);
1533 goto ipv6_pktoptions;
1537 if (tcp_checksum_complete(skb))
1540 if (sk->sk_state == TCP_LISTEN) {
1541 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1547 if (tcp_child_process(sk, nsk, skb))
1550 __kfree_skb(opt_skb);
1554 sock_rps_save_rxhash(sk, skb);
1556 if (tcp_rcv_state_process(sk, skb))
1559 goto ipv6_pktoptions;
1563 tcp_v6_send_reset(sk, skb);
1566 __kfree_skb(opt_skb);
1567 kfree_skb_reason(skb, reason);
1570 reason = SKB_DROP_REASON_TCP_CSUM;
1571 trace_tcp_bad_csum(skb);
1572 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1573 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1578 /* Do you ask, what is it?
1580 1. skb was enqueued by tcp.
1581 2. skb is added to tail of read queue, rather than out of order.
1582 3. socket is not in passive state.
1583 4. Finally, it really contains options, which user wants to receive.
1586 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1587 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1588 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1589 np->mcast_oif = tcp_v6_iif(opt_skb);
1590 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1591 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1592 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1593 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1595 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1596 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1597 skb_set_owner_r(opt_skb, sk);
1598 tcp_v6_restore_cb(opt_skb);
1599 opt_skb = xchg(&np->pktoptions, opt_skb);
1601 __kfree_skb(opt_skb);
1602 opt_skb = xchg(&np->pktoptions, NULL);
1606 consume_skb(opt_skb);
1610 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1611 const struct tcphdr *th)
1613 /* This is tricky: we move IP6CB at its correct location into
1614 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1615 * _decode_session6() uses IP6CB().
1616 * barrier() makes sure compiler won't play aliasing games.
1618 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1619 sizeof(struct inet6_skb_parm));
1622 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1623 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1624 skb->len - th->doff*4);
1625 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1626 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1627 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1628 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1629 TCP_SKB_CB(skb)->sacked = 0;
1630 TCP_SKB_CB(skb)->has_rxtstamp =
1631 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1634 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1636 enum skb_drop_reason drop_reason;
1637 int sdif = inet6_sdif(skb);
1638 int dif = inet6_iif(skb);
1639 const struct tcphdr *th;
1640 const struct ipv6hdr *hdr;
1644 struct net *net = dev_net(skb->dev);
1646 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1647 if (skb->pkt_type != PACKET_HOST)
1651 * Count it even if it's bad.
1653 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1655 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1658 th = (const struct tcphdr *)skb->data;
1660 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1661 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1664 if (!pskb_may_pull(skb, th->doff*4))
1667 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1670 th = (const struct tcphdr *)skb->data;
1671 hdr = ipv6_hdr(skb);
1674 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1675 th->source, th->dest, inet6_iif(skb), sdif,
1681 if (sk->sk_state == TCP_TIME_WAIT)
1684 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1685 struct request_sock *req = inet_reqsk(sk);
1686 bool req_stolen = false;
1689 sk = req->rsk_listener;
1690 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif,
1692 sk_drops_add(sk, skb);
1696 if (tcp_checksum_complete(skb)) {
1700 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1701 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1703 inet_csk_reqsk_queue_drop_and_put(sk, req);
1707 /* reuseport_migrate_sock() has already held one sk_refcnt
1715 if (!tcp_filter(sk, skb)) {
1716 th = (const struct tcphdr *)skb->data;
1717 hdr = ipv6_hdr(skb);
1718 tcp_v6_fill_cb(skb, hdr, th);
1719 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1721 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1726 /* Another cpu got exclusive access to req
1727 * and created a full blown socket.
1728 * Try to feed this packet to this socket
1729 * instead of discarding it.
1731 tcp_v6_restore_cb(skb);
1735 goto discard_and_relse;
1739 tcp_v6_restore_cb(skb);
1740 } else if (tcp_child_process(sk, nsk, skb)) {
1741 tcp_v6_send_reset(nsk, skb);
1742 goto discard_and_relse;
1749 if (static_branch_unlikely(&ip6_min_hopcount)) {
1750 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1751 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
1752 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1753 goto discard_and_relse;
1757 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1758 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1759 goto discard_and_relse;
1762 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif, &drop_reason))
1763 goto discard_and_relse;
1765 if (tcp_filter(sk, skb)) {
1766 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1767 goto discard_and_relse;
1769 th = (const struct tcphdr *)skb->data;
1770 hdr = ipv6_hdr(skb);
1771 tcp_v6_fill_cb(skb, hdr, th);
1775 if (sk->sk_state == TCP_LISTEN) {
1776 ret = tcp_v6_do_rcv(sk, skb);
1777 goto put_and_return;
1780 sk_incoming_cpu_update(sk);
1782 sk_defer_free_flush(sk);
1783 bh_lock_sock_nested(sk);
1784 tcp_segs_in(tcp_sk(sk), skb);
1786 if (!sock_owned_by_user(sk)) {
1787 ret = tcp_v6_do_rcv(sk, skb);
1789 if (tcp_add_backlog(sk, skb, &drop_reason))
1790 goto discard_and_relse;
1796 return ret ? -1 : 0;
1799 drop_reason = SKB_DROP_REASON_NO_SOCKET;
1800 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1803 tcp_v6_fill_cb(skb, hdr, th);
1805 if (tcp_checksum_complete(skb)) {
1807 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1808 trace_tcp_bad_csum(skb);
1809 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1811 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1813 tcp_v6_send_reset(NULL, skb);
1817 kfree_skb_reason(skb, drop_reason);
1821 sk_drops_add(sk, skb);
1827 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1828 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1829 inet_twsk_put(inet_twsk(sk));
1833 tcp_v6_fill_cb(skb, hdr, th);
1835 if (tcp_checksum_complete(skb)) {
1836 inet_twsk_put(inet_twsk(sk));
1840 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1845 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1846 skb, __tcp_hdrlen(th),
1847 &ipv6_hdr(skb)->saddr, th->source,
1848 &ipv6_hdr(skb)->daddr,
1850 tcp_v6_iif_l3_slave(skb),
1853 struct inet_timewait_sock *tw = inet_twsk(sk);
1854 inet_twsk_deschedule_put(tw);
1856 tcp_v6_restore_cb(skb);
1864 tcp_v6_timewait_ack(sk, skb);
1867 tcp_v6_send_reset(sk, skb);
1868 inet_twsk_deschedule_put(inet_twsk(sk));
1870 case TCP_TW_SUCCESS:
1876 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1878 const struct ipv6hdr *hdr;
1879 const struct tcphdr *th;
1882 if (skb->pkt_type != PACKET_HOST)
1885 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1888 hdr = ipv6_hdr(skb);
1891 if (th->doff < sizeof(struct tcphdr) / 4)
1894 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1895 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1896 &hdr->saddr, th->source,
1897 &hdr->daddr, ntohs(th->dest),
1898 inet6_iif(skb), inet6_sdif(skb));
1901 skb->destructor = sock_edemux;
1902 if (sk_fullsock(sk)) {
1903 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1906 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1908 sk->sk_rx_dst_ifindex == skb->skb_iif)
1909 skb_dst_set_noref(skb, dst);
1914 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1915 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1916 .twsk_unique = tcp_twsk_unique,
1917 .twsk_destructor = tcp_twsk_destructor,
1920 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1922 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1925 const struct inet_connection_sock_af_ops ipv6_specific = {
1926 .queue_xmit = inet6_csk_xmit,
1927 .send_check = tcp_v6_send_check,
1928 .rebuild_header = inet6_sk_rebuild_header,
1929 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1930 .conn_request = tcp_v6_conn_request,
1931 .syn_recv_sock = tcp_v6_syn_recv_sock,
1932 .net_header_len = sizeof(struct ipv6hdr),
1933 .net_frag_header_len = sizeof(struct frag_hdr),
1934 .setsockopt = ipv6_setsockopt,
1935 .getsockopt = ipv6_getsockopt,
1936 .addr2sockaddr = inet6_csk_addr2sockaddr,
1937 .sockaddr_len = sizeof(struct sockaddr_in6),
1938 .mtu_reduced = tcp_v6_mtu_reduced,
1941 #ifdef CONFIG_TCP_MD5SIG
1942 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1943 .md5_lookup = tcp_v6_md5_lookup,
1944 .calc_md5_hash = tcp_v6_md5_hash_skb,
1945 .md5_parse = tcp_v6_parse_md5_keys,
1950 * TCP over IPv4 via INET6 API
1952 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1953 .queue_xmit = ip_queue_xmit,
1954 .send_check = tcp_v4_send_check,
1955 .rebuild_header = inet_sk_rebuild_header,
1956 .sk_rx_dst_set = inet_sk_rx_dst_set,
1957 .conn_request = tcp_v6_conn_request,
1958 .syn_recv_sock = tcp_v6_syn_recv_sock,
1959 .net_header_len = sizeof(struct iphdr),
1960 .setsockopt = ipv6_setsockopt,
1961 .getsockopt = ipv6_getsockopt,
1962 .addr2sockaddr = inet6_csk_addr2sockaddr,
1963 .sockaddr_len = sizeof(struct sockaddr_in6),
1964 .mtu_reduced = tcp_v4_mtu_reduced,
1967 #ifdef CONFIG_TCP_MD5SIG
1968 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1969 .md5_lookup = tcp_v4_md5_lookup,
1970 .calc_md5_hash = tcp_v4_md5_hash_skb,
1971 .md5_parse = tcp_v6_parse_md5_keys,
1975 /* NOTE: A lot of things set to zero explicitly by call to
1976 * sk_alloc() so need not be done here.
1978 static int tcp_v6_init_sock(struct sock *sk)
1980 struct inet_connection_sock *icsk = inet_csk(sk);
1984 icsk->icsk_af_ops = &ipv6_specific;
1986 #ifdef CONFIG_TCP_MD5SIG
1987 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1993 static void tcp_v6_destroy_sock(struct sock *sk)
1995 tcp_v4_destroy_sock(sk);
1996 inet6_destroy_sock(sk);
1999 #ifdef CONFIG_PROC_FS
2000 /* Proc filesystem TCPv6 sock list dumping. */
2001 static void get_openreq6(struct seq_file *seq,
2002 const struct request_sock *req, int i)
2004 long ttd = req->rsk_timer.expires - jiffies;
2005 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
2006 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
2012 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2013 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
2015 src->s6_addr32[0], src->s6_addr32[1],
2016 src->s6_addr32[2], src->s6_addr32[3],
2017 inet_rsk(req)->ir_num,
2018 dest->s6_addr32[0], dest->s6_addr32[1],
2019 dest->s6_addr32[2], dest->s6_addr32[3],
2020 ntohs(inet_rsk(req)->ir_rmt_port),
2022 0, 0, /* could print option size, but that is af dependent. */
2023 1, /* timers active (only the expire timer) */
2024 jiffies_to_clock_t(ttd),
2026 from_kuid_munged(seq_user_ns(seq),
2027 sock_i_uid(req->rsk_listener)),
2028 0, /* non standard timer */
2029 0, /* open_requests have no inode */
2033 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2035 const struct in6_addr *dest, *src;
2038 unsigned long timer_expires;
2039 const struct inet_sock *inet = inet_sk(sp);
2040 const struct tcp_sock *tp = tcp_sk(sp);
2041 const struct inet_connection_sock *icsk = inet_csk(sp);
2042 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2046 dest = &sp->sk_v6_daddr;
2047 src = &sp->sk_v6_rcv_saddr;
2048 destp = ntohs(inet->inet_dport);
2049 srcp = ntohs(inet->inet_sport);
2051 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2052 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2053 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2055 timer_expires = icsk->icsk_timeout;
2056 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2058 timer_expires = icsk->icsk_timeout;
2059 } else if (timer_pending(&sp->sk_timer)) {
2061 timer_expires = sp->sk_timer.expires;
2064 timer_expires = jiffies;
2067 state = inet_sk_state_load(sp);
2068 if (state == TCP_LISTEN)
2069 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2071 /* Because we don't lock the socket,
2072 * we might find a transient negative value.
2074 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2075 READ_ONCE(tp->copied_seq), 0);
2078 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2079 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2081 src->s6_addr32[0], src->s6_addr32[1],
2082 src->s6_addr32[2], src->s6_addr32[3], srcp,
2083 dest->s6_addr32[0], dest->s6_addr32[1],
2084 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2086 READ_ONCE(tp->write_seq) - tp->snd_una,
2089 jiffies_delta_to_clock_t(timer_expires - jiffies),
2090 icsk->icsk_retransmits,
2091 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2092 icsk->icsk_probes_out,
2094 refcount_read(&sp->sk_refcnt), sp,
2095 jiffies_to_clock_t(icsk->icsk_rto),
2096 jiffies_to_clock_t(icsk->icsk_ack.ato),
2097 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2099 state == TCP_LISTEN ?
2100 fastopenq->max_qlen :
2101 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2105 static void get_timewait6_sock(struct seq_file *seq,
2106 struct inet_timewait_sock *tw, int i)
2108 long delta = tw->tw_timer.expires - jiffies;
2109 const struct in6_addr *dest, *src;
2112 dest = &tw->tw_v6_daddr;
2113 src = &tw->tw_v6_rcv_saddr;
2114 destp = ntohs(tw->tw_dport);
2115 srcp = ntohs(tw->tw_sport);
2118 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2119 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2121 src->s6_addr32[0], src->s6_addr32[1],
2122 src->s6_addr32[2], src->s6_addr32[3], srcp,
2123 dest->s6_addr32[0], dest->s6_addr32[1],
2124 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2125 tw->tw_substate, 0, 0,
2126 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2127 refcount_read(&tw->tw_refcnt), tw);
2130 static int tcp6_seq_show(struct seq_file *seq, void *v)
2132 struct tcp_iter_state *st;
2133 struct sock *sk = v;
2135 if (v == SEQ_START_TOKEN) {
2140 "st tx_queue rx_queue tr tm->when retrnsmt"
2141 " uid timeout inode\n");
2146 if (sk->sk_state == TCP_TIME_WAIT)
2147 get_timewait6_sock(seq, v, st->num);
2148 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2149 get_openreq6(seq, v, st->num);
2151 get_tcp6_sock(seq, v, st->num);
2156 static const struct seq_operations tcp6_seq_ops = {
2157 .show = tcp6_seq_show,
2158 .start = tcp_seq_start,
2159 .next = tcp_seq_next,
2160 .stop = tcp_seq_stop,
2163 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2167 int __net_init tcp6_proc_init(struct net *net)
2169 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2170 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2175 void tcp6_proc_exit(struct net *net)
2177 remove_proc_entry("tcp6", net->proc_net);
2181 struct proto tcpv6_prot = {
2183 .owner = THIS_MODULE,
2185 .pre_connect = tcp_v6_pre_connect,
2186 .connect = tcp_v6_connect,
2187 .disconnect = tcp_disconnect,
2188 .accept = inet_csk_accept,
2190 .init = tcp_v6_init_sock,
2191 .destroy = tcp_v6_destroy_sock,
2192 .shutdown = tcp_shutdown,
2193 .setsockopt = tcp_setsockopt,
2194 .getsockopt = tcp_getsockopt,
2195 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt,
2196 .keepalive = tcp_set_keepalive,
2197 .recvmsg = tcp_recvmsg,
2198 .sendmsg = tcp_sendmsg,
2199 .sendpage = tcp_sendpage,
2200 .backlog_rcv = tcp_v6_do_rcv,
2201 .release_cb = tcp_release_cb,
2203 .unhash = inet_unhash,
2204 .get_port = inet_csk_get_port,
2205 .put_port = inet_put_port,
2206 #ifdef CONFIG_BPF_SYSCALL
2207 .psock_update_sk_prot = tcp_bpf_update_proto,
2209 .enter_memory_pressure = tcp_enter_memory_pressure,
2210 .leave_memory_pressure = tcp_leave_memory_pressure,
2211 .stream_memory_free = tcp_stream_memory_free,
2212 .sockets_allocated = &tcp_sockets_allocated,
2213 .memory_allocated = &tcp_memory_allocated,
2214 .memory_pressure = &tcp_memory_pressure,
2215 .orphan_count = &tcp_orphan_count,
2216 .sysctl_mem = sysctl_tcp_mem,
2217 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2218 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2219 .max_header = MAX_TCP_HEADER,
2220 .obj_size = sizeof(struct tcp6_sock),
2221 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2222 .twsk_prot = &tcp6_timewait_sock_ops,
2223 .rsk_prot = &tcp6_request_sock_ops,
2224 .h.hashinfo = &tcp_hashinfo,
2225 .no_autobind = true,
2226 .diag_destroy = tcp_abort,
2228 EXPORT_SYMBOL_GPL(tcpv6_prot);
2230 /* thinking of making this const? Don't.
2231 * early_demux can change based on sysctl.
2233 static struct inet6_protocol tcpv6_protocol = {
2234 .early_demux = tcp_v6_early_demux,
2235 .early_demux_handler = tcp_v6_early_demux,
2236 .handler = tcp_v6_rcv,
2237 .err_handler = tcp_v6_err,
2238 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2241 static struct inet_protosw tcpv6_protosw = {
2242 .type = SOCK_STREAM,
2243 .protocol = IPPROTO_TCP,
2244 .prot = &tcpv6_prot,
2245 .ops = &inet6_stream_ops,
2246 .flags = INET_PROTOSW_PERMANENT |
2250 static int __net_init tcpv6_net_init(struct net *net)
2252 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2253 SOCK_RAW, IPPROTO_TCP, net);
2256 static void __net_exit tcpv6_net_exit(struct net *net)
2258 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2261 static struct pernet_operations tcpv6_net_ops = {
2262 .init = tcpv6_net_init,
2263 .exit = tcpv6_net_exit,
2266 int __init tcpv6_init(void)
2270 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2274 /* register inet6 protocol */
2275 ret = inet6_register_protosw(&tcpv6_protosw);
2277 goto out_tcpv6_protocol;
2279 ret = register_pernet_subsys(&tcpv6_net_ops);
2281 goto out_tcpv6_protosw;
2283 ret = mptcpv6_init();
2285 goto out_tcpv6_pernet_subsys;
2290 out_tcpv6_pernet_subsys:
2291 unregister_pernet_subsys(&tcpv6_net_ops);
2293 inet6_unregister_protosw(&tcpv6_protosw);
2295 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2299 void tcpv6_exit(void)
2301 unregister_pernet_subsys(&tcpv6_net_ops);
2302 inet6_unregister_protosw(&tcpv6_protosw);
2303 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);