OSDN Git Service

ipv6: tcp: consistently use MAX_TCP_HEADER
[uclinux-h8/linux.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98         unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105         struct dst_entry *dst = skb_dst(skb);
106
107         if (dst && dst_hold_safe(dst)) {
108                 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110                 rcu_assign_pointer(sk->sk_rx_dst, dst);
111                 sk->sk_rx_dst_ifindex = skb->skb_iif;
112                 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
113         }
114 }
115
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119                                 ipv6_hdr(skb)->saddr.s6_addr32,
120                                 tcp_hdr(skb)->dest,
121                                 tcp_hdr(skb)->source);
122 }
123
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127                                    ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131                               int addr_len)
132 {
133         /* This check is replicated from tcp_v6_connect() and intended to
134          * prevent BPF program called below from accessing bytes that are out
135          * of the bound specified by user in addr_len.
136          */
137         if (addr_len < SIN6_LEN_RFC2133)
138                 return -EINVAL;
139
140         sock_owned_by_me(sk);
141
142         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146                           int addr_len)
147 {
148         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149         struct inet_sock *inet = inet_sk(sk);
150         struct inet_connection_sock *icsk = inet_csk(sk);
151         struct inet_timewait_death_row *tcp_death_row;
152         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
153         struct tcp_sock *tp = tcp_sk(sk);
154         struct in6_addr *saddr = NULL, *final_p, final;
155         struct ipv6_txoptions *opt;
156         struct flowi6 fl6;
157         struct dst_entry *dst;
158         int addr_type;
159         int err;
160
161         if (addr_len < SIN6_LEN_RFC2133)
162                 return -EINVAL;
163
164         if (usin->sin6_family != AF_INET6)
165                 return -EAFNOSUPPORT;
166
167         memset(&fl6, 0, sizeof(fl6));
168
169         if (np->sndflow) {
170                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171                 IP6_ECN_flow_init(fl6.flowlabel);
172                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173                         struct ip6_flowlabel *flowlabel;
174                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175                         if (IS_ERR(flowlabel))
176                                 return -EINVAL;
177                         fl6_sock_release(flowlabel);
178                 }
179         }
180
181         /*
182          *      connect() to INADDR_ANY means loopback (BSD'ism).
183          */
184
185         if (ipv6_addr_any(&usin->sin6_addr)) {
186                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188                                                &usin->sin6_addr);
189                 else
190                         usin->sin6_addr = in6addr_loopback;
191         }
192
193         addr_type = ipv6_addr_type(&usin->sin6_addr);
194
195         if (addr_type & IPV6_ADDR_MULTICAST)
196                 return -ENETUNREACH;
197
198         if (addr_type&IPV6_ADDR_LINKLOCAL) {
199                 if (addr_len >= sizeof(struct sockaddr_in6) &&
200                     usin->sin6_scope_id) {
201                         /* If interface is set while binding, indices
202                          * must coincide.
203                          */
204                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205                                 return -EINVAL;
206
207                         sk->sk_bound_dev_if = usin->sin6_scope_id;
208                 }
209
210                 /* Connect to link-local address requires an interface */
211                 if (!sk->sk_bound_dev_if)
212                         return -EINVAL;
213         }
214
215         if (tp->rx_opt.ts_recent_stamp &&
216             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217                 tp->rx_opt.ts_recent = 0;
218                 tp->rx_opt.ts_recent_stamp = 0;
219                 WRITE_ONCE(tp->write_seq, 0);
220         }
221
222         sk->sk_v6_daddr = usin->sin6_addr;
223         np->flow_label = fl6.flowlabel;
224
225         /*
226          *      TCP over IPv4
227          */
228
229         if (addr_type & IPV6_ADDR_MAPPED) {
230                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
231                 struct sockaddr_in sin;
232
233                 if (__ipv6_only_sock(sk))
234                         return -ENETUNREACH;
235
236                 sin.sin_family = AF_INET;
237                 sin.sin_port = usin->sin6_port;
238                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239
240                 icsk->icsk_af_ops = &ipv6_mapped;
241                 if (sk_is_mptcp(sk))
242                         mptcpv6_handle_mapped(sk, true);
243                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
246 #endif
247
248                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
249
250                 if (err) {
251                         icsk->icsk_ext_hdr_len = exthdrlen;
252                         icsk->icsk_af_ops = &ipv6_specific;
253                         if (sk_is_mptcp(sk))
254                                 mptcpv6_handle_mapped(sk, false);
255                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257                         tp->af_specific = &tcp_sock_ipv6_specific;
258 #endif
259                         goto failure;
260                 }
261                 np->saddr = sk->sk_v6_rcv_saddr;
262
263                 return err;
264         }
265
266         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267                 saddr = &sk->sk_v6_rcv_saddr;
268
269         fl6.flowi6_proto = IPPROTO_TCP;
270         fl6.daddr = sk->sk_v6_daddr;
271         fl6.saddr = saddr ? *saddr : np->saddr;
272         fl6.flowi6_oif = sk->sk_bound_dev_if;
273         fl6.flowi6_mark = sk->sk_mark;
274         fl6.fl6_dport = usin->sin6_port;
275         fl6.fl6_sport = inet->inet_sport;
276         fl6.flowi6_uid = sk->sk_uid;
277
278         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279         final_p = fl6_update_dst(&fl6, opt, &final);
280
281         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
282
283         dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
284         if (IS_ERR(dst)) {
285                 err = PTR_ERR(dst);
286                 goto failure;
287         }
288
289         if (!saddr) {
290                 saddr = &fl6.saddr;
291                 sk->sk_v6_rcv_saddr = *saddr;
292         }
293
294         /* set the source address */
295         np->saddr = *saddr;
296         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
297
298         sk->sk_gso_type = SKB_GSO_TCPV6;
299         ip6_dst_store(sk, dst, NULL, NULL);
300
301         icsk->icsk_ext_hdr_len = 0;
302         if (opt)
303                 icsk->icsk_ext_hdr_len = opt->opt_flen +
304                                          opt->opt_nflen;
305
306         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
307
308         inet->inet_dport = usin->sin6_port;
309
310         tcp_set_state(sk, TCP_SYN_SENT);
311         tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
312         err = inet6_hash_connect(tcp_death_row, sk);
313         if (err)
314                 goto late_failure;
315
316         sk_set_txhash(sk);
317
318         if (likely(!tp->repair)) {
319                 if (!tp->write_seq)
320                         WRITE_ONCE(tp->write_seq,
321                                    secure_tcpv6_seq(np->saddr.s6_addr32,
322                                                     sk->sk_v6_daddr.s6_addr32,
323                                                     inet->inet_sport,
324                                                     inet->inet_dport));
325                 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
326                                                    np->saddr.s6_addr32,
327                                                    sk->sk_v6_daddr.s6_addr32);
328         }
329
330         if (tcp_fastopen_defer_connect(sk, &err))
331                 return err;
332         if (err)
333                 goto late_failure;
334
335         err = tcp_connect(sk);
336         if (err)
337                 goto late_failure;
338
339         return 0;
340
341 late_failure:
342         tcp_set_state(sk, TCP_CLOSE);
343 failure:
344         inet->inet_dport = 0;
345         sk->sk_route_caps = 0;
346         return err;
347 }
348
349 static void tcp_v6_mtu_reduced(struct sock *sk)
350 {
351         struct dst_entry *dst;
352         u32 mtu;
353
354         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
355                 return;
356
357         mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
358
359         /* Drop requests trying to increase our current mss.
360          * Check done in __ip6_rt_update_pmtu() is too late.
361          */
362         if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
363                 return;
364
365         dst = inet6_csk_update_pmtu(sk, mtu);
366         if (!dst)
367                 return;
368
369         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
370                 tcp_sync_mss(sk, dst_mtu(dst));
371                 tcp_simple_retransmit(sk);
372         }
373 }
374
375 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
376                 u8 type, u8 code, int offset, __be32 info)
377 {
378         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
379         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
380         struct net *net = dev_net(skb->dev);
381         struct request_sock *fastopen;
382         struct ipv6_pinfo *np;
383         struct tcp_sock *tp;
384         __u32 seq, snd_una;
385         struct sock *sk;
386         bool fatal;
387         int err;
388
389         sk = __inet6_lookup_established(net, &tcp_hashinfo,
390                                         &hdr->daddr, th->dest,
391                                         &hdr->saddr, ntohs(th->source),
392                                         skb->dev->ifindex, inet6_sdif(skb));
393
394         if (!sk) {
395                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
396                                   ICMP6_MIB_INERRORS);
397                 return -ENOENT;
398         }
399
400         if (sk->sk_state == TCP_TIME_WAIT) {
401                 inet_twsk_put(inet_twsk(sk));
402                 return 0;
403         }
404         seq = ntohl(th->seq);
405         fatal = icmpv6_err_convert(type, code, &err);
406         if (sk->sk_state == TCP_NEW_SYN_RECV) {
407                 tcp_req_err(sk, seq, fatal);
408                 return 0;
409         }
410
411         bh_lock_sock(sk);
412         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
413                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
414
415         if (sk->sk_state == TCP_CLOSE)
416                 goto out;
417
418         if (static_branch_unlikely(&ip6_min_hopcount)) {
419                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
420                 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
421                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
422                         goto out;
423                 }
424         }
425
426         tp = tcp_sk(sk);
427         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
428         fastopen = rcu_dereference(tp->fastopen_rsk);
429         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
430         if (sk->sk_state != TCP_LISTEN &&
431             !between(seq, snd_una, tp->snd_nxt)) {
432                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
433                 goto out;
434         }
435
436         np = tcp_inet6_sk(sk);
437
438         if (type == NDISC_REDIRECT) {
439                 if (!sock_owned_by_user(sk)) {
440                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
441
442                         if (dst)
443                                 dst->ops->redirect(dst, sk, skb);
444                 }
445                 goto out;
446         }
447
448         if (type == ICMPV6_PKT_TOOBIG) {
449                 u32 mtu = ntohl(info);
450
451                 /* We are not interested in TCP_LISTEN and open_requests
452                  * (SYN-ACKs send out by Linux are always <576bytes so
453                  * they should go through unfragmented).
454                  */
455                 if (sk->sk_state == TCP_LISTEN)
456                         goto out;
457
458                 if (!ip6_sk_accept_pmtu(sk))
459                         goto out;
460
461                 if (mtu < IPV6_MIN_MTU)
462                         goto out;
463
464                 WRITE_ONCE(tp->mtu_info, mtu);
465
466                 if (!sock_owned_by_user(sk))
467                         tcp_v6_mtu_reduced(sk);
468                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
469                                            &sk->sk_tsq_flags))
470                         sock_hold(sk);
471                 goto out;
472         }
473
474
475         /* Might be for an request_sock */
476         switch (sk->sk_state) {
477         case TCP_SYN_SENT:
478         case TCP_SYN_RECV:
479                 /* Only in fast or simultaneous open. If a fast open socket is
480                  * already accepted it is treated as a connected one below.
481                  */
482                 if (fastopen && !fastopen->sk)
483                         break;
484
485                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
486
487                 if (!sock_owned_by_user(sk)) {
488                         sk->sk_err = err;
489                         sk_error_report(sk);            /* Wake people up to see the error (see connect in sock.c) */
490
491                         tcp_done(sk);
492                 } else
493                         sk->sk_err_soft = err;
494                 goto out;
495         case TCP_LISTEN:
496                 break;
497         default:
498                 /* check if this ICMP message allows revert of backoff.
499                  * (see RFC 6069)
500                  */
501                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
502                     code == ICMPV6_NOROUTE)
503                         tcp_ld_RTO_revert(sk, seq);
504         }
505
506         if (!sock_owned_by_user(sk) && np->recverr) {
507                 sk->sk_err = err;
508                 sk_error_report(sk);
509         } else
510                 sk->sk_err_soft = err;
511
512 out:
513         bh_unlock_sock(sk);
514         sock_put(sk);
515         return 0;
516 }
517
518
519 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
520                               struct flowi *fl,
521                               struct request_sock *req,
522                               struct tcp_fastopen_cookie *foc,
523                               enum tcp_synack_type synack_type,
524                               struct sk_buff *syn_skb)
525 {
526         struct inet_request_sock *ireq = inet_rsk(req);
527         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
528         struct ipv6_txoptions *opt;
529         struct flowi6 *fl6 = &fl->u.ip6;
530         struct sk_buff *skb;
531         int err = -ENOMEM;
532         u8 tclass;
533
534         /* First, grab a route. */
535         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
536                                                IPPROTO_TCP)) == NULL)
537                 goto done;
538
539         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
540
541         if (skb) {
542                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
543                                     &ireq->ir_v6_rmt_addr);
544
545                 fl6->daddr = ireq->ir_v6_rmt_addr;
546                 if (np->repflow && ireq->pktopts)
547                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
548
549                 tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
550                                 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
551                                 (np->tclass & INET_ECN_MASK) :
552                                 np->tclass;
553
554                 if (!INET_ECN_is_capable(tclass) &&
555                     tcp_bpf_ca_needs_ecn((struct sock *)req))
556                         tclass |= INET_ECN_ECT_0;
557
558                 rcu_read_lock();
559                 opt = ireq->ipv6_opt;
560                 if (!opt)
561                         opt = rcu_dereference(np->opt);
562                 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
563                                tclass, sk->sk_priority);
564                 rcu_read_unlock();
565                 err = net_xmit_eval(err);
566         }
567
568 done:
569         return err;
570 }
571
572
573 static void tcp_v6_reqsk_destructor(struct request_sock *req)
574 {
575         kfree(inet_rsk(req)->ipv6_opt);
576         consume_skb(inet_rsk(req)->pktopts);
577 }
578
579 #ifdef CONFIG_TCP_MD5SIG
580 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
581                                                    const struct in6_addr *addr,
582                                                    int l3index)
583 {
584         return tcp_md5_do_lookup(sk, l3index,
585                                  (union tcp_md5_addr *)addr, AF_INET6);
586 }
587
588 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
589                                                 const struct sock *addr_sk)
590 {
591         int l3index;
592
593         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
594                                                  addr_sk->sk_bound_dev_if);
595         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
596                                     l3index);
597 }
598
599 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
600                                  sockptr_t optval, int optlen)
601 {
602         struct tcp_md5sig cmd;
603         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
604         int l3index = 0;
605         u8 prefixlen;
606         u8 flags;
607
608         if (optlen < sizeof(cmd))
609                 return -EINVAL;
610
611         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
612                 return -EFAULT;
613
614         if (sin6->sin6_family != AF_INET6)
615                 return -EINVAL;
616
617         flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
618
619         if (optname == TCP_MD5SIG_EXT &&
620             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
621                 prefixlen = cmd.tcpm_prefixlen;
622                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
623                                         prefixlen > 32))
624                         return -EINVAL;
625         } else {
626                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
627         }
628
629         if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
630             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
631                 struct net_device *dev;
632
633                 rcu_read_lock();
634                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
635                 if (dev && netif_is_l3_master(dev))
636                         l3index = dev->ifindex;
637                 rcu_read_unlock();
638
639                 /* ok to reference set/not set outside of rcu;
640                  * right now device MUST be an L3 master
641                  */
642                 if (!dev || !l3index)
643                         return -EINVAL;
644         }
645
646         if (!cmd.tcpm_keylen) {
647                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
648                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
649                                               AF_INET, prefixlen,
650                                               l3index, flags);
651                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
652                                       AF_INET6, prefixlen, l3index, flags);
653         }
654
655         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
656                 return -EINVAL;
657
658         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
659                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
660                                       AF_INET, prefixlen, l3index, flags,
661                                       cmd.tcpm_key, cmd.tcpm_keylen,
662                                       GFP_KERNEL);
663
664         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
665                               AF_INET6, prefixlen, l3index, flags,
666                               cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
667 }
668
669 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
670                                    const struct in6_addr *daddr,
671                                    const struct in6_addr *saddr,
672                                    const struct tcphdr *th, int nbytes)
673 {
674         struct tcp6_pseudohdr *bp;
675         struct scatterlist sg;
676         struct tcphdr *_th;
677
678         bp = hp->scratch;
679         /* 1. TCP pseudo-header (RFC2460) */
680         bp->saddr = *saddr;
681         bp->daddr = *daddr;
682         bp->protocol = cpu_to_be32(IPPROTO_TCP);
683         bp->len = cpu_to_be32(nbytes);
684
685         _th = (struct tcphdr *)(bp + 1);
686         memcpy(_th, th, sizeof(*th));
687         _th->check = 0;
688
689         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
690         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
691                                 sizeof(*bp) + sizeof(*th));
692         return crypto_ahash_update(hp->md5_req);
693 }
694
695 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
696                                const struct in6_addr *daddr, struct in6_addr *saddr,
697                                const struct tcphdr *th)
698 {
699         struct tcp_md5sig_pool *hp;
700         struct ahash_request *req;
701
702         hp = tcp_get_md5sig_pool();
703         if (!hp)
704                 goto clear_hash_noput;
705         req = hp->md5_req;
706
707         if (crypto_ahash_init(req))
708                 goto clear_hash;
709         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
710                 goto clear_hash;
711         if (tcp_md5_hash_key(hp, key))
712                 goto clear_hash;
713         ahash_request_set_crypt(req, NULL, md5_hash, 0);
714         if (crypto_ahash_final(req))
715                 goto clear_hash;
716
717         tcp_put_md5sig_pool();
718         return 0;
719
720 clear_hash:
721         tcp_put_md5sig_pool();
722 clear_hash_noput:
723         memset(md5_hash, 0, 16);
724         return 1;
725 }
726
727 static int tcp_v6_md5_hash_skb(char *md5_hash,
728                                const struct tcp_md5sig_key *key,
729                                const struct sock *sk,
730                                const struct sk_buff *skb)
731 {
732         const struct in6_addr *saddr, *daddr;
733         struct tcp_md5sig_pool *hp;
734         struct ahash_request *req;
735         const struct tcphdr *th = tcp_hdr(skb);
736
737         if (sk) { /* valid for establish/request sockets */
738                 saddr = &sk->sk_v6_rcv_saddr;
739                 daddr = &sk->sk_v6_daddr;
740         } else {
741                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
742                 saddr = &ip6h->saddr;
743                 daddr = &ip6h->daddr;
744         }
745
746         hp = tcp_get_md5sig_pool();
747         if (!hp)
748                 goto clear_hash_noput;
749         req = hp->md5_req;
750
751         if (crypto_ahash_init(req))
752                 goto clear_hash;
753
754         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
755                 goto clear_hash;
756         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
757                 goto clear_hash;
758         if (tcp_md5_hash_key(hp, key))
759                 goto clear_hash;
760         ahash_request_set_crypt(req, NULL, md5_hash, 0);
761         if (crypto_ahash_final(req))
762                 goto clear_hash;
763
764         tcp_put_md5sig_pool();
765         return 0;
766
767 clear_hash:
768         tcp_put_md5sig_pool();
769 clear_hash_noput:
770         memset(md5_hash, 0, 16);
771         return 1;
772 }
773
774 #endif
775
776 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
777                                     const struct sk_buff *skb,
778                                     int dif, int sdif,
779                                     enum skb_drop_reason *reason)
780 {
781 #ifdef CONFIG_TCP_MD5SIG
782         const __u8 *hash_location = NULL;
783         struct tcp_md5sig_key *hash_expected;
784         const struct ipv6hdr *ip6h = ipv6_hdr(skb);
785         const struct tcphdr *th = tcp_hdr(skb);
786         int genhash, l3index;
787         u8 newhash[16];
788
789         /* sdif set, means packet ingressed via a device
790          * in an L3 domain and dif is set to the l3mdev
791          */
792         l3index = sdif ? dif : 0;
793
794         hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
795         hash_location = tcp_parse_md5sig_option(th);
796
797         /* We've parsed the options - do we have a hash? */
798         if (!hash_expected && !hash_location)
799                 return false;
800
801         if (hash_expected && !hash_location) {
802                 *reason = SKB_DROP_REASON_TCP_MD5NOTFOUND;
803                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
804                 return true;
805         }
806
807         if (!hash_expected && hash_location) {
808                 *reason = SKB_DROP_REASON_TCP_MD5UNEXPECTED;
809                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
810                 return true;
811         }
812
813         /* check the signature */
814         genhash = tcp_v6_md5_hash_skb(newhash,
815                                       hash_expected,
816                                       NULL, skb);
817
818         if (genhash || memcmp(hash_location, newhash, 16) != 0) {
819                 *reason = SKB_DROP_REASON_TCP_MD5FAILURE;
820                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
821                 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
822                                      genhash ? "failed" : "mismatch",
823                                      &ip6h->saddr, ntohs(th->source),
824                                      &ip6h->daddr, ntohs(th->dest), l3index);
825                 return true;
826         }
827 #endif
828         return false;
829 }
830
831 static void tcp_v6_init_req(struct request_sock *req,
832                             const struct sock *sk_listener,
833                             struct sk_buff *skb)
834 {
835         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
836         struct inet_request_sock *ireq = inet_rsk(req);
837         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
838
839         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
840         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
841
842         /* So that link locals have meaning */
843         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
844             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
845                 ireq->ir_iif = tcp_v6_iif(skb);
846
847         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
848             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
849              np->rxopt.bits.rxinfo ||
850              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
851              np->rxopt.bits.rxohlim || np->repflow)) {
852                 refcount_inc(&skb->users);
853                 ireq->pktopts = skb;
854         }
855 }
856
857 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
858                                           struct sk_buff *skb,
859                                           struct flowi *fl,
860                                           struct request_sock *req)
861 {
862         tcp_v6_init_req(req, sk, skb);
863
864         if (security_inet_conn_request(sk, skb, req))
865                 return NULL;
866
867         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
868 }
869
870 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
871         .family         =       AF_INET6,
872         .obj_size       =       sizeof(struct tcp6_request_sock),
873         .rtx_syn_ack    =       tcp_rtx_synack,
874         .send_ack       =       tcp_v6_reqsk_send_ack,
875         .destructor     =       tcp_v6_reqsk_destructor,
876         .send_reset     =       tcp_v6_send_reset,
877         .syn_ack_timeout =      tcp_syn_ack_timeout,
878 };
879
880 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
881         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
882                                 sizeof(struct ipv6hdr),
883 #ifdef CONFIG_TCP_MD5SIG
884         .req_md5_lookup =       tcp_v6_md5_lookup,
885         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
886 #endif
887 #ifdef CONFIG_SYN_COOKIES
888         .cookie_init_seq =      cookie_v6_init_sequence,
889 #endif
890         .route_req      =       tcp_v6_route_req,
891         .init_seq       =       tcp_v6_init_seq,
892         .init_ts_off    =       tcp_v6_init_ts_off,
893         .send_synack    =       tcp_v6_send_synack,
894 };
895
896 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
897                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
898                                  int oif, struct tcp_md5sig_key *key, int rst,
899                                  u8 tclass, __be32 label, u32 priority)
900 {
901         const struct tcphdr *th = tcp_hdr(skb);
902         struct tcphdr *t1;
903         struct sk_buff *buff;
904         struct flowi6 fl6;
905         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
906         struct sock *ctl_sk = net->ipv6.tcp_sk;
907         unsigned int tot_len = sizeof(struct tcphdr);
908         __be32 mrst = 0, *topt;
909         struct dst_entry *dst;
910         __u32 mark = 0;
911
912         if (tsecr)
913                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
914 #ifdef CONFIG_TCP_MD5SIG
915         if (key)
916                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
917 #endif
918
919 #ifdef CONFIG_MPTCP
920         if (rst && !key) {
921                 mrst = mptcp_reset_option(skb);
922
923                 if (mrst)
924                         tot_len += sizeof(__be32);
925         }
926 #endif
927
928         buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
929         if (!buff)
930                 return;
931
932         skb_reserve(buff, MAX_TCP_HEADER);
933
934         t1 = skb_push(buff, tot_len);
935         skb_reset_transport_header(buff);
936
937         /* Swap the send and the receive. */
938         memset(t1, 0, sizeof(*t1));
939         t1->dest = th->source;
940         t1->source = th->dest;
941         t1->doff = tot_len / 4;
942         t1->seq = htonl(seq);
943         t1->ack_seq = htonl(ack);
944         t1->ack = !rst || !th->ack;
945         t1->rst = rst;
946         t1->window = htons(win);
947
948         topt = (__be32 *)(t1 + 1);
949
950         if (tsecr) {
951                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
952                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
953                 *topt++ = htonl(tsval);
954                 *topt++ = htonl(tsecr);
955         }
956
957         if (mrst)
958                 *topt++ = mrst;
959
960 #ifdef CONFIG_TCP_MD5SIG
961         if (key) {
962                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
963                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
964                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
965                                     &ipv6_hdr(skb)->saddr,
966                                     &ipv6_hdr(skb)->daddr, t1);
967         }
968 #endif
969
970         memset(&fl6, 0, sizeof(fl6));
971         fl6.daddr = ipv6_hdr(skb)->saddr;
972         fl6.saddr = ipv6_hdr(skb)->daddr;
973         fl6.flowlabel = label;
974
975         buff->ip_summed = CHECKSUM_PARTIAL;
976
977         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
978
979         fl6.flowi6_proto = IPPROTO_TCP;
980         if (rt6_need_strict(&fl6.daddr) && !oif)
981                 fl6.flowi6_oif = tcp_v6_iif(skb);
982         else {
983                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
984                         oif = skb->skb_iif;
985
986                 fl6.flowi6_oif = oif;
987         }
988
989         if (sk) {
990                 if (sk->sk_state == TCP_TIME_WAIT) {
991                         mark = inet_twsk(sk)->tw_mark;
992                         /* autoflowlabel relies on buff->hash */
993                         skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
994                                      PKT_HASH_TYPE_L4);
995                 } else {
996                         mark = sk->sk_mark;
997                 }
998                 buff->tstamp = tcp_transmit_time(sk);
999         }
1000         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
1001         fl6.fl6_dport = t1->dest;
1002         fl6.fl6_sport = t1->source;
1003         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
1004         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
1005
1006         /* Pass a socket to ip6_dst_lookup either it is for RST
1007          * Underlying function will use this to retrieve the network
1008          * namespace
1009          */
1010         dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
1011         if (!IS_ERR(dst)) {
1012                 skb_dst_set(buff, dst);
1013                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
1014                          tclass & ~INET_ECN_MASK, priority);
1015                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
1016                 if (rst)
1017                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
1018                 return;
1019         }
1020
1021         kfree_skb(buff);
1022 }
1023
1024 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1025 {
1026         const struct tcphdr *th = tcp_hdr(skb);
1027         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1028         u32 seq = 0, ack_seq = 0;
1029         struct tcp_md5sig_key *key = NULL;
1030 #ifdef CONFIG_TCP_MD5SIG
1031         const __u8 *hash_location = NULL;
1032         unsigned char newhash[16];
1033         int genhash;
1034         struct sock *sk1 = NULL;
1035 #endif
1036         __be32 label = 0;
1037         u32 priority = 0;
1038         struct net *net;
1039         int oif = 0;
1040
1041         if (th->rst)
1042                 return;
1043
1044         /* If sk not NULL, it means we did a successful lookup and incoming
1045          * route had to be correct. prequeue might have dropped our dst.
1046          */
1047         if (!sk && !ipv6_unicast_destination(skb))
1048                 return;
1049
1050         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1051 #ifdef CONFIG_TCP_MD5SIG
1052         rcu_read_lock();
1053         hash_location = tcp_parse_md5sig_option(th);
1054         if (sk && sk_fullsock(sk)) {
1055                 int l3index;
1056
1057                 /* sdif set, means packet ingressed via a device
1058                  * in an L3 domain and inet_iif is set to it.
1059                  */
1060                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1061                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1062         } else if (hash_location) {
1063                 int dif = tcp_v6_iif_l3_slave(skb);
1064                 int sdif = tcp_v6_sdif(skb);
1065                 int l3index;
1066
1067                 /*
1068                  * active side is lost. Try to find listening socket through
1069                  * source port, and then find md5 key through listening socket.
1070                  * we are not loose security here:
1071                  * Incoming packet is checked with md5 hash with finding key,
1072                  * no RST generated if md5 hash doesn't match.
1073                  */
1074                 sk1 = inet6_lookup_listener(net,
1075                                            &tcp_hashinfo, NULL, 0,
1076                                            &ipv6h->saddr,
1077                                            th->source, &ipv6h->daddr,
1078                                            ntohs(th->source), dif, sdif);
1079                 if (!sk1)
1080                         goto out;
1081
1082                 /* sdif set, means packet ingressed via a device
1083                  * in an L3 domain and dif is set to it.
1084                  */
1085                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1086
1087                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1088                 if (!key)
1089                         goto out;
1090
1091                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1092                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1093                         goto out;
1094         }
1095 #endif
1096
1097         if (th->ack)
1098                 seq = ntohl(th->ack_seq);
1099         else
1100                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1101                           (th->doff << 2);
1102
1103         if (sk) {
1104                 oif = sk->sk_bound_dev_if;
1105                 if (sk_fullsock(sk)) {
1106                         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1107
1108                         trace_tcp_send_reset(sk, skb);
1109                         if (np->repflow)
1110                                 label = ip6_flowlabel(ipv6h);
1111                         priority = sk->sk_priority;
1112                 }
1113                 if (sk->sk_state == TCP_TIME_WAIT) {
1114                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1115                         priority = inet_twsk(sk)->tw_priority;
1116                 }
1117         } else {
1118                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1119                         label = ip6_flowlabel(ipv6h);
1120         }
1121
1122         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1123                              ipv6_get_dsfield(ipv6h), label, priority);
1124
1125 #ifdef CONFIG_TCP_MD5SIG
1126 out:
1127         rcu_read_unlock();
1128 #endif
1129 }
1130
1131 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1132                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1133                             struct tcp_md5sig_key *key, u8 tclass,
1134                             __be32 label, u32 priority)
1135 {
1136         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1137                              tclass, label, priority);
1138 }
1139
1140 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1141 {
1142         struct inet_timewait_sock *tw = inet_twsk(sk);
1143         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1144
1145         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1146                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1147                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1148                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1149                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1150
1151         inet_twsk_put(tw);
1152 }
1153
1154 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1155                                   struct request_sock *req)
1156 {
1157         int l3index;
1158
1159         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1160
1161         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1162          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1163          */
1164         /* RFC 7323 2.3
1165          * The window field (SEG.WND) of every outgoing segment, with the
1166          * exception of <SYN> segments, MUST be right-shifted by
1167          * Rcv.Wind.Shift bits:
1168          */
1169         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1170                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1171                         tcp_rsk(req)->rcv_nxt,
1172                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1173                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1174                         req->ts_recent, sk->sk_bound_dev_if,
1175                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1176                         ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1177 }
1178
1179
1180 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1181 {
1182 #ifdef CONFIG_SYN_COOKIES
1183         const struct tcphdr *th = tcp_hdr(skb);
1184
1185         if (!th->syn)
1186                 sk = cookie_v6_check(sk, skb);
1187 #endif
1188         return sk;
1189 }
1190
1191 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1192                          struct tcphdr *th, u32 *cookie)
1193 {
1194         u16 mss = 0;
1195 #ifdef CONFIG_SYN_COOKIES
1196         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1197                                     &tcp_request_sock_ipv6_ops, sk, th);
1198         if (mss) {
1199                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1200                 tcp_synq_overflow(sk);
1201         }
1202 #endif
1203         return mss;
1204 }
1205
1206 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1207 {
1208         if (skb->protocol == htons(ETH_P_IP))
1209                 return tcp_v4_conn_request(sk, skb);
1210
1211         if (!ipv6_unicast_destination(skb))
1212                 goto drop;
1213
1214         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1215                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1216                 return 0;
1217         }
1218
1219         return tcp_conn_request(&tcp6_request_sock_ops,
1220                                 &tcp_request_sock_ipv6_ops, sk, skb);
1221
1222 drop:
1223         tcp_listendrop(sk);
1224         return 0; /* don't send reset */
1225 }
1226
1227 static void tcp_v6_restore_cb(struct sk_buff *skb)
1228 {
1229         /* We need to move header back to the beginning if xfrm6_policy_check()
1230          * and tcp_v6_fill_cb() are going to be called again.
1231          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1232          */
1233         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1234                 sizeof(struct inet6_skb_parm));
1235 }
1236
1237 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1238                                          struct request_sock *req,
1239                                          struct dst_entry *dst,
1240                                          struct request_sock *req_unhash,
1241                                          bool *own_req)
1242 {
1243         struct inet_request_sock *ireq;
1244         struct ipv6_pinfo *newnp;
1245         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1246         struct ipv6_txoptions *opt;
1247         struct inet_sock *newinet;
1248         bool found_dup_sk = false;
1249         struct tcp_sock *newtp;
1250         struct sock *newsk;
1251 #ifdef CONFIG_TCP_MD5SIG
1252         struct tcp_md5sig_key *key;
1253         int l3index;
1254 #endif
1255         struct flowi6 fl6;
1256
1257         if (skb->protocol == htons(ETH_P_IP)) {
1258                 /*
1259                  *      v6 mapped
1260                  */
1261
1262                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1263                                              req_unhash, own_req);
1264
1265                 if (!newsk)
1266                         return NULL;
1267
1268                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1269
1270                 newnp = tcp_inet6_sk(newsk);
1271                 newtp = tcp_sk(newsk);
1272
1273                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1274
1275                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1276
1277                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1278                 if (sk_is_mptcp(newsk))
1279                         mptcpv6_handle_mapped(newsk, true);
1280                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1281 #ifdef CONFIG_TCP_MD5SIG
1282                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1283 #endif
1284
1285                 newnp->ipv6_mc_list = NULL;
1286                 newnp->ipv6_ac_list = NULL;
1287                 newnp->ipv6_fl_list = NULL;
1288                 newnp->pktoptions  = NULL;
1289                 newnp->opt         = NULL;
1290                 newnp->mcast_oif   = inet_iif(skb);
1291                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1292                 newnp->rcv_flowinfo = 0;
1293                 if (np->repflow)
1294                         newnp->flow_label = 0;
1295
1296                 /*
1297                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1298                  * here, tcp_create_openreq_child now does this for us, see the comment in
1299                  * that function for the gory details. -acme
1300                  */
1301
1302                 /* It is tricky place. Until this moment IPv4 tcp
1303                    worked with IPv6 icsk.icsk_af_ops.
1304                    Sync it now.
1305                  */
1306                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1307
1308                 return newsk;
1309         }
1310
1311         ireq = inet_rsk(req);
1312
1313         if (sk_acceptq_is_full(sk))
1314                 goto out_overflow;
1315
1316         if (!dst) {
1317                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1318                 if (!dst)
1319                         goto out;
1320         }
1321
1322         newsk = tcp_create_openreq_child(sk, req, skb);
1323         if (!newsk)
1324                 goto out_nonewsk;
1325
1326         /*
1327          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1328          * count here, tcp_create_openreq_child now does this for us, see the
1329          * comment in that function for the gory details. -acme
1330          */
1331
1332         newsk->sk_gso_type = SKB_GSO_TCPV6;
1333         ip6_dst_store(newsk, dst, NULL, NULL);
1334         inet6_sk_rx_dst_set(newsk, skb);
1335
1336         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1337
1338         newtp = tcp_sk(newsk);
1339         newinet = inet_sk(newsk);
1340         newnp = tcp_inet6_sk(newsk);
1341
1342         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1343
1344         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1345         newnp->saddr = ireq->ir_v6_loc_addr;
1346         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1347         newsk->sk_bound_dev_if = ireq->ir_iif;
1348
1349         /* Now IPv6 options...
1350
1351            First: no IPv4 options.
1352          */
1353         newinet->inet_opt = NULL;
1354         newnp->ipv6_mc_list = NULL;
1355         newnp->ipv6_ac_list = NULL;
1356         newnp->ipv6_fl_list = NULL;
1357
1358         /* Clone RX bits */
1359         newnp->rxopt.all = np->rxopt.all;
1360
1361         newnp->pktoptions = NULL;
1362         newnp->opt        = NULL;
1363         newnp->mcast_oif  = tcp_v6_iif(skb);
1364         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1365         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1366         if (np->repflow)
1367                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1368
1369         /* Set ToS of the new socket based upon the value of incoming SYN.
1370          * ECT bits are set later in tcp_init_transfer().
1371          */
1372         if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
1373                 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1374
1375         /* Clone native IPv6 options from listening socket (if any)
1376
1377            Yes, keeping reference count would be much more clever,
1378            but we make one more one thing there: reattach optmem
1379            to newsk.
1380          */
1381         opt = ireq->ipv6_opt;
1382         if (!opt)
1383                 opt = rcu_dereference(np->opt);
1384         if (opt) {
1385                 opt = ipv6_dup_options(newsk, opt);
1386                 RCU_INIT_POINTER(newnp->opt, opt);
1387         }
1388         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1389         if (opt)
1390                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1391                                                     opt->opt_flen;
1392
1393         tcp_ca_openreq_child(newsk, dst);
1394
1395         tcp_sync_mss(newsk, dst_mtu(dst));
1396         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1397
1398         tcp_initialize_rcv_mss(newsk);
1399
1400         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1401         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1402
1403 #ifdef CONFIG_TCP_MD5SIG
1404         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1405
1406         /* Copy over the MD5 key from the original socket */
1407         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1408         if (key) {
1409                 /* We're using one, so create a matching key
1410                  * on the newsk structure. If we fail to get
1411                  * memory, then we end up not copying the key
1412                  * across. Shucks.
1413                  */
1414                 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1415                                AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
1416                                sk_gfp_mask(sk, GFP_ATOMIC));
1417         }
1418 #endif
1419
1420         if (__inet_inherit_port(sk, newsk) < 0) {
1421                 inet_csk_prepare_forced_close(newsk);
1422                 tcp_done(newsk);
1423                 goto out;
1424         }
1425         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1426                                        &found_dup_sk);
1427         if (*own_req) {
1428                 tcp_move_syn(newtp, req);
1429
1430                 /* Clone pktoptions received with SYN, if we own the req */
1431                 if (ireq->pktopts) {
1432                         newnp->pktoptions = skb_clone(ireq->pktopts,
1433                                                       sk_gfp_mask(sk, GFP_ATOMIC));
1434                         consume_skb(ireq->pktopts);
1435                         ireq->pktopts = NULL;
1436                         if (newnp->pktoptions) {
1437                                 tcp_v6_restore_cb(newnp->pktoptions);
1438                                 skb_set_owner_r(newnp->pktoptions, newsk);
1439                         }
1440                 }
1441         } else {
1442                 if (!req_unhash && found_dup_sk) {
1443                         /* This code path should only be executed in the
1444                          * syncookie case only
1445                          */
1446                         bh_unlock_sock(newsk);
1447                         sock_put(newsk);
1448                         newsk = NULL;
1449                 }
1450         }
1451
1452         return newsk;
1453
1454 out_overflow:
1455         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1456 out_nonewsk:
1457         dst_release(dst);
1458 out:
1459         tcp_listendrop(sk);
1460         return NULL;
1461 }
1462
1463 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1464                                                            u32));
1465 /* The socket must have it's spinlock held when we get
1466  * here, unless it is a TCP_LISTEN socket.
1467  *
1468  * We have a potential double-lock case here, so even when
1469  * doing backlog processing we use the BH locking scheme.
1470  * This is because we cannot sleep with the original spinlock
1471  * held.
1472  */
1473 INDIRECT_CALLABLE_SCOPE
1474 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1475 {
1476         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1477         struct sk_buff *opt_skb = NULL;
1478         enum skb_drop_reason reason;
1479         struct tcp_sock *tp;
1480
1481         /* Imagine: socket is IPv6. IPv4 packet arrives,
1482            goes to IPv4 receive handler and backlogged.
1483            From backlog it always goes here. Kerboom...
1484            Fortunately, tcp_rcv_established and rcv_established
1485            handle them correctly, but it is not case with
1486            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1487          */
1488
1489         if (skb->protocol == htons(ETH_P_IP))
1490                 return tcp_v4_do_rcv(sk, skb);
1491
1492         /*
1493          *      socket locking is here for SMP purposes as backlog rcv
1494          *      is currently called with bh processing disabled.
1495          */
1496
1497         /* Do Stevens' IPV6_PKTOPTIONS.
1498
1499            Yes, guys, it is the only place in our code, where we
1500            may make it not affecting IPv4.
1501            The rest of code is protocol independent,
1502            and I do not like idea to uglify IPv4.
1503
1504            Actually, all the idea behind IPV6_PKTOPTIONS
1505            looks not very well thought. For now we latch
1506            options, received in the last packet, enqueued
1507            by tcp. Feel free to propose better solution.
1508                                                --ANK (980728)
1509          */
1510         if (np->rxopt.all)
1511                 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1512
1513         reason = SKB_DROP_REASON_NOT_SPECIFIED;
1514         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1515                 struct dst_entry *dst;
1516
1517                 dst = rcu_dereference_protected(sk->sk_rx_dst,
1518                                                 lockdep_sock_is_held(sk));
1519
1520                 sock_rps_save_rxhash(sk, skb);
1521                 sk_mark_napi_id(sk, skb);
1522                 if (dst) {
1523                         if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1524                             INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1525                                             dst, sk->sk_rx_dst_cookie) == NULL) {
1526                                 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1527                                 dst_release(dst);
1528                         }
1529                 }
1530
1531                 tcp_rcv_established(sk, skb);
1532                 if (opt_skb)
1533                         goto ipv6_pktoptions;
1534                 return 0;
1535         }
1536
1537         if (tcp_checksum_complete(skb))
1538                 goto csum_err;
1539
1540         if (sk->sk_state == TCP_LISTEN) {
1541                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1542
1543                 if (!nsk)
1544                         goto discard;
1545
1546                 if (nsk != sk) {
1547                         if (tcp_child_process(sk, nsk, skb))
1548                                 goto reset;
1549                         if (opt_skb)
1550                                 __kfree_skb(opt_skb);
1551                         return 0;
1552                 }
1553         } else
1554                 sock_rps_save_rxhash(sk, skb);
1555
1556         if (tcp_rcv_state_process(sk, skb))
1557                 goto reset;
1558         if (opt_skb)
1559                 goto ipv6_pktoptions;
1560         return 0;
1561
1562 reset:
1563         tcp_v6_send_reset(sk, skb);
1564 discard:
1565         if (opt_skb)
1566                 __kfree_skb(opt_skb);
1567         kfree_skb_reason(skb, reason);
1568         return 0;
1569 csum_err:
1570         reason = SKB_DROP_REASON_TCP_CSUM;
1571         trace_tcp_bad_csum(skb);
1572         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1573         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1574         goto discard;
1575
1576
1577 ipv6_pktoptions:
1578         /* Do you ask, what is it?
1579
1580            1. skb was enqueued by tcp.
1581            2. skb is added to tail of read queue, rather than out of order.
1582            3. socket is not in passive state.
1583            4. Finally, it really contains options, which user wants to receive.
1584          */
1585         tp = tcp_sk(sk);
1586         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1587             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1588                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1589                         np->mcast_oif = tcp_v6_iif(opt_skb);
1590                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1591                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1592                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1593                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1594                 if (np->repflow)
1595                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1596                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1597                         skb_set_owner_r(opt_skb, sk);
1598                         tcp_v6_restore_cb(opt_skb);
1599                         opt_skb = xchg(&np->pktoptions, opt_skb);
1600                 } else {
1601                         __kfree_skb(opt_skb);
1602                         opt_skb = xchg(&np->pktoptions, NULL);
1603                 }
1604         }
1605
1606         consume_skb(opt_skb);
1607         return 0;
1608 }
1609
1610 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1611                            const struct tcphdr *th)
1612 {
1613         /* This is tricky: we move IP6CB at its correct location into
1614          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1615          * _decode_session6() uses IP6CB().
1616          * barrier() makes sure compiler won't play aliasing games.
1617          */
1618         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1619                 sizeof(struct inet6_skb_parm));
1620         barrier();
1621
1622         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1623         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1624                                     skb->len - th->doff*4);
1625         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1626         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1627         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1628         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1629         TCP_SKB_CB(skb)->sacked = 0;
1630         TCP_SKB_CB(skb)->has_rxtstamp =
1631                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1632 }
1633
1634 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1635 {
1636         enum skb_drop_reason drop_reason;
1637         int sdif = inet6_sdif(skb);
1638         int dif = inet6_iif(skb);
1639         const struct tcphdr *th;
1640         const struct ipv6hdr *hdr;
1641         bool refcounted;
1642         struct sock *sk;
1643         int ret;
1644         struct net *net = dev_net(skb->dev);
1645
1646         drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1647         if (skb->pkt_type != PACKET_HOST)
1648                 goto discard_it;
1649
1650         /*
1651          *      Count it even if it's bad.
1652          */
1653         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1654
1655         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1656                 goto discard_it;
1657
1658         th = (const struct tcphdr *)skb->data;
1659
1660         if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1661                 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1662                 goto bad_packet;
1663         }
1664         if (!pskb_may_pull(skb, th->doff*4))
1665                 goto discard_it;
1666
1667         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1668                 goto csum_error;
1669
1670         th = (const struct tcphdr *)skb->data;
1671         hdr = ipv6_hdr(skb);
1672
1673 lookup:
1674         sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1675                                 th->source, th->dest, inet6_iif(skb), sdif,
1676                                 &refcounted);
1677         if (!sk)
1678                 goto no_tcp_socket;
1679
1680 process:
1681         if (sk->sk_state == TCP_TIME_WAIT)
1682                 goto do_time_wait;
1683
1684         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1685                 struct request_sock *req = inet_reqsk(sk);
1686                 bool req_stolen = false;
1687                 struct sock *nsk;
1688
1689                 sk = req->rsk_listener;
1690                 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif,
1691                                             &drop_reason)) {
1692                         sk_drops_add(sk, skb);
1693                         reqsk_put(req);
1694                         goto discard_it;
1695                 }
1696                 if (tcp_checksum_complete(skb)) {
1697                         reqsk_put(req);
1698                         goto csum_error;
1699                 }
1700                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1701                         nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1702                         if (!nsk) {
1703                                 inet_csk_reqsk_queue_drop_and_put(sk, req);
1704                                 goto lookup;
1705                         }
1706                         sk = nsk;
1707                         /* reuseport_migrate_sock() has already held one sk_refcnt
1708                          * before returning.
1709                          */
1710                 } else {
1711                         sock_hold(sk);
1712                 }
1713                 refcounted = true;
1714                 nsk = NULL;
1715                 if (!tcp_filter(sk, skb)) {
1716                         th = (const struct tcphdr *)skb->data;
1717                         hdr = ipv6_hdr(skb);
1718                         tcp_v6_fill_cb(skb, hdr, th);
1719                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1720                 } else {
1721                         drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1722                 }
1723                 if (!nsk) {
1724                         reqsk_put(req);
1725                         if (req_stolen) {
1726                                 /* Another cpu got exclusive access to req
1727                                  * and created a full blown socket.
1728                                  * Try to feed this packet to this socket
1729                                  * instead of discarding it.
1730                                  */
1731                                 tcp_v6_restore_cb(skb);
1732                                 sock_put(sk);
1733                                 goto lookup;
1734                         }
1735                         goto discard_and_relse;
1736                 }
1737                 if (nsk == sk) {
1738                         reqsk_put(req);
1739                         tcp_v6_restore_cb(skb);
1740                 } else if (tcp_child_process(sk, nsk, skb)) {
1741                         tcp_v6_send_reset(nsk, skb);
1742                         goto discard_and_relse;
1743                 } else {
1744                         sock_put(sk);
1745                         return 0;
1746                 }
1747         }
1748
1749         if (static_branch_unlikely(&ip6_min_hopcount)) {
1750                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1751                 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
1752                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1753                         goto discard_and_relse;
1754                 }
1755         }
1756
1757         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1758                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1759                 goto discard_and_relse;
1760         }
1761
1762         if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif, &drop_reason))
1763                 goto discard_and_relse;
1764
1765         if (tcp_filter(sk, skb)) {
1766                 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1767                 goto discard_and_relse;
1768         }
1769         th = (const struct tcphdr *)skb->data;
1770         hdr = ipv6_hdr(skb);
1771         tcp_v6_fill_cb(skb, hdr, th);
1772
1773         skb->dev = NULL;
1774
1775         if (sk->sk_state == TCP_LISTEN) {
1776                 ret = tcp_v6_do_rcv(sk, skb);
1777                 goto put_and_return;
1778         }
1779
1780         sk_incoming_cpu_update(sk);
1781
1782         sk_defer_free_flush(sk);
1783         bh_lock_sock_nested(sk);
1784         tcp_segs_in(tcp_sk(sk), skb);
1785         ret = 0;
1786         if (!sock_owned_by_user(sk)) {
1787                 ret = tcp_v6_do_rcv(sk, skb);
1788         } else {
1789                 if (tcp_add_backlog(sk, skb, &drop_reason))
1790                         goto discard_and_relse;
1791         }
1792         bh_unlock_sock(sk);
1793 put_and_return:
1794         if (refcounted)
1795                 sock_put(sk);
1796         return ret ? -1 : 0;
1797
1798 no_tcp_socket:
1799         drop_reason = SKB_DROP_REASON_NO_SOCKET;
1800         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1801                 goto discard_it;
1802
1803         tcp_v6_fill_cb(skb, hdr, th);
1804
1805         if (tcp_checksum_complete(skb)) {
1806 csum_error:
1807                 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1808                 trace_tcp_bad_csum(skb);
1809                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1810 bad_packet:
1811                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1812         } else {
1813                 tcp_v6_send_reset(NULL, skb);
1814         }
1815
1816 discard_it:
1817         kfree_skb_reason(skb, drop_reason);
1818         return 0;
1819
1820 discard_and_relse:
1821         sk_drops_add(sk, skb);
1822         if (refcounted)
1823                 sock_put(sk);
1824         goto discard_it;
1825
1826 do_time_wait:
1827         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1828                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1829                 inet_twsk_put(inet_twsk(sk));
1830                 goto discard_it;
1831         }
1832
1833         tcp_v6_fill_cb(skb, hdr, th);
1834
1835         if (tcp_checksum_complete(skb)) {
1836                 inet_twsk_put(inet_twsk(sk));
1837                 goto csum_error;
1838         }
1839
1840         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1841         case TCP_TW_SYN:
1842         {
1843                 struct sock *sk2;
1844
1845                 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1846                                             skb, __tcp_hdrlen(th),
1847                                             &ipv6_hdr(skb)->saddr, th->source,
1848                                             &ipv6_hdr(skb)->daddr,
1849                                             ntohs(th->dest),
1850                                             tcp_v6_iif_l3_slave(skb),
1851                                             sdif);
1852                 if (sk2) {
1853                         struct inet_timewait_sock *tw = inet_twsk(sk);
1854                         inet_twsk_deschedule_put(tw);
1855                         sk = sk2;
1856                         tcp_v6_restore_cb(skb);
1857                         refcounted = false;
1858                         goto process;
1859                 }
1860         }
1861                 /* to ACK */
1862                 fallthrough;
1863         case TCP_TW_ACK:
1864                 tcp_v6_timewait_ack(sk, skb);
1865                 break;
1866         case TCP_TW_RST:
1867                 tcp_v6_send_reset(sk, skb);
1868                 inet_twsk_deschedule_put(inet_twsk(sk));
1869                 goto discard_it;
1870         case TCP_TW_SUCCESS:
1871                 ;
1872         }
1873         goto discard_it;
1874 }
1875
1876 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1877 {
1878         const struct ipv6hdr *hdr;
1879         const struct tcphdr *th;
1880         struct sock *sk;
1881
1882         if (skb->pkt_type != PACKET_HOST)
1883                 return;
1884
1885         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1886                 return;
1887
1888         hdr = ipv6_hdr(skb);
1889         th = tcp_hdr(skb);
1890
1891         if (th->doff < sizeof(struct tcphdr) / 4)
1892                 return;
1893
1894         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1895         sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1896                                         &hdr->saddr, th->source,
1897                                         &hdr->daddr, ntohs(th->dest),
1898                                         inet6_iif(skb), inet6_sdif(skb));
1899         if (sk) {
1900                 skb->sk = sk;
1901                 skb->destructor = sock_edemux;
1902                 if (sk_fullsock(sk)) {
1903                         struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1904
1905                         if (dst)
1906                                 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1907                         if (dst &&
1908                             sk->sk_rx_dst_ifindex == skb->skb_iif)
1909                                 skb_dst_set_noref(skb, dst);
1910                 }
1911         }
1912 }
1913
1914 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1915         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1916         .twsk_unique    = tcp_twsk_unique,
1917         .twsk_destructor = tcp_twsk_destructor,
1918 };
1919
1920 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1921 {
1922         __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1923 }
1924
1925 const struct inet_connection_sock_af_ops ipv6_specific = {
1926         .queue_xmit        = inet6_csk_xmit,
1927         .send_check        = tcp_v6_send_check,
1928         .rebuild_header    = inet6_sk_rebuild_header,
1929         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1930         .conn_request      = tcp_v6_conn_request,
1931         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1932         .net_header_len    = sizeof(struct ipv6hdr),
1933         .net_frag_header_len = sizeof(struct frag_hdr),
1934         .setsockopt        = ipv6_setsockopt,
1935         .getsockopt        = ipv6_getsockopt,
1936         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1937         .sockaddr_len      = sizeof(struct sockaddr_in6),
1938         .mtu_reduced       = tcp_v6_mtu_reduced,
1939 };
1940
1941 #ifdef CONFIG_TCP_MD5SIG
1942 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1943         .md5_lookup     =       tcp_v6_md5_lookup,
1944         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1945         .md5_parse      =       tcp_v6_parse_md5_keys,
1946 };
1947 #endif
1948
1949 /*
1950  *      TCP over IPv4 via INET6 API
1951  */
1952 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1953         .queue_xmit        = ip_queue_xmit,
1954         .send_check        = tcp_v4_send_check,
1955         .rebuild_header    = inet_sk_rebuild_header,
1956         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1957         .conn_request      = tcp_v6_conn_request,
1958         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1959         .net_header_len    = sizeof(struct iphdr),
1960         .setsockopt        = ipv6_setsockopt,
1961         .getsockopt        = ipv6_getsockopt,
1962         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1963         .sockaddr_len      = sizeof(struct sockaddr_in6),
1964         .mtu_reduced       = tcp_v4_mtu_reduced,
1965 };
1966
1967 #ifdef CONFIG_TCP_MD5SIG
1968 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1969         .md5_lookup     =       tcp_v4_md5_lookup,
1970         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1971         .md5_parse      =       tcp_v6_parse_md5_keys,
1972 };
1973 #endif
1974
1975 /* NOTE: A lot of things set to zero explicitly by call to
1976  *       sk_alloc() so need not be done here.
1977  */
1978 static int tcp_v6_init_sock(struct sock *sk)
1979 {
1980         struct inet_connection_sock *icsk = inet_csk(sk);
1981
1982         tcp_init_sock(sk);
1983
1984         icsk->icsk_af_ops = &ipv6_specific;
1985
1986 #ifdef CONFIG_TCP_MD5SIG
1987         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1988 #endif
1989
1990         return 0;
1991 }
1992
1993 static void tcp_v6_destroy_sock(struct sock *sk)
1994 {
1995         tcp_v4_destroy_sock(sk);
1996         inet6_destroy_sock(sk);
1997 }
1998
1999 #ifdef CONFIG_PROC_FS
2000 /* Proc filesystem TCPv6 sock list dumping. */
2001 static void get_openreq6(struct seq_file *seq,
2002                          const struct request_sock *req, int i)
2003 {
2004         long ttd = req->rsk_timer.expires - jiffies;
2005         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
2006         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
2007
2008         if (ttd < 0)
2009                 ttd = 0;
2010
2011         seq_printf(seq,
2012                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2013                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
2014                    i,
2015                    src->s6_addr32[0], src->s6_addr32[1],
2016                    src->s6_addr32[2], src->s6_addr32[3],
2017                    inet_rsk(req)->ir_num,
2018                    dest->s6_addr32[0], dest->s6_addr32[1],
2019                    dest->s6_addr32[2], dest->s6_addr32[3],
2020                    ntohs(inet_rsk(req)->ir_rmt_port),
2021                    TCP_SYN_RECV,
2022                    0, 0, /* could print option size, but that is af dependent. */
2023                    1,   /* timers active (only the expire timer) */
2024                    jiffies_to_clock_t(ttd),
2025                    req->num_timeout,
2026                    from_kuid_munged(seq_user_ns(seq),
2027                                     sock_i_uid(req->rsk_listener)),
2028                    0,  /* non standard timer */
2029                    0, /* open_requests have no inode */
2030                    0, req);
2031 }
2032
2033 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2034 {
2035         const struct in6_addr *dest, *src;
2036         __u16 destp, srcp;
2037         int timer_active;
2038         unsigned long timer_expires;
2039         const struct inet_sock *inet = inet_sk(sp);
2040         const struct tcp_sock *tp = tcp_sk(sp);
2041         const struct inet_connection_sock *icsk = inet_csk(sp);
2042         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2043         int rx_queue;
2044         int state;
2045
2046         dest  = &sp->sk_v6_daddr;
2047         src   = &sp->sk_v6_rcv_saddr;
2048         destp = ntohs(inet->inet_dport);
2049         srcp  = ntohs(inet->inet_sport);
2050
2051         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2052             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2053             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2054                 timer_active    = 1;
2055                 timer_expires   = icsk->icsk_timeout;
2056         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2057                 timer_active    = 4;
2058                 timer_expires   = icsk->icsk_timeout;
2059         } else if (timer_pending(&sp->sk_timer)) {
2060                 timer_active    = 2;
2061                 timer_expires   = sp->sk_timer.expires;
2062         } else {
2063                 timer_active    = 0;
2064                 timer_expires = jiffies;
2065         }
2066
2067         state = inet_sk_state_load(sp);
2068         if (state == TCP_LISTEN)
2069                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2070         else
2071                 /* Because we don't lock the socket,
2072                  * we might find a transient negative value.
2073                  */
2074                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2075                                       READ_ONCE(tp->copied_seq), 0);
2076
2077         seq_printf(seq,
2078                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2079                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2080                    i,
2081                    src->s6_addr32[0], src->s6_addr32[1],
2082                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2083                    dest->s6_addr32[0], dest->s6_addr32[1],
2084                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2085                    state,
2086                    READ_ONCE(tp->write_seq) - tp->snd_una,
2087                    rx_queue,
2088                    timer_active,
2089                    jiffies_delta_to_clock_t(timer_expires - jiffies),
2090                    icsk->icsk_retransmits,
2091                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2092                    icsk->icsk_probes_out,
2093                    sock_i_ino(sp),
2094                    refcount_read(&sp->sk_refcnt), sp,
2095                    jiffies_to_clock_t(icsk->icsk_rto),
2096                    jiffies_to_clock_t(icsk->icsk_ack.ato),
2097                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2098                    tp->snd_cwnd,
2099                    state == TCP_LISTEN ?
2100                         fastopenq->max_qlen :
2101                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2102                    );
2103 }
2104
2105 static void get_timewait6_sock(struct seq_file *seq,
2106                                struct inet_timewait_sock *tw, int i)
2107 {
2108         long delta = tw->tw_timer.expires - jiffies;
2109         const struct in6_addr *dest, *src;
2110         __u16 destp, srcp;
2111
2112         dest = &tw->tw_v6_daddr;
2113         src  = &tw->tw_v6_rcv_saddr;
2114         destp = ntohs(tw->tw_dport);
2115         srcp  = ntohs(tw->tw_sport);
2116
2117         seq_printf(seq,
2118                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2119                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2120                    i,
2121                    src->s6_addr32[0], src->s6_addr32[1],
2122                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2123                    dest->s6_addr32[0], dest->s6_addr32[1],
2124                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2125                    tw->tw_substate, 0, 0,
2126                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2127                    refcount_read(&tw->tw_refcnt), tw);
2128 }
2129
2130 static int tcp6_seq_show(struct seq_file *seq, void *v)
2131 {
2132         struct tcp_iter_state *st;
2133         struct sock *sk = v;
2134
2135         if (v == SEQ_START_TOKEN) {
2136                 seq_puts(seq,
2137                          "  sl  "
2138                          "local_address                         "
2139                          "remote_address                        "
2140                          "st tx_queue rx_queue tr tm->when retrnsmt"
2141                          "   uid  timeout inode\n");
2142                 goto out;
2143         }
2144         st = seq->private;
2145
2146         if (sk->sk_state == TCP_TIME_WAIT)
2147                 get_timewait6_sock(seq, v, st->num);
2148         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2149                 get_openreq6(seq, v, st->num);
2150         else
2151                 get_tcp6_sock(seq, v, st->num);
2152 out:
2153         return 0;
2154 }
2155
2156 static const struct seq_operations tcp6_seq_ops = {
2157         .show           = tcp6_seq_show,
2158         .start          = tcp_seq_start,
2159         .next           = tcp_seq_next,
2160         .stop           = tcp_seq_stop,
2161 };
2162
2163 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2164         .family         = AF_INET6,
2165 };
2166
2167 int __net_init tcp6_proc_init(struct net *net)
2168 {
2169         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2170                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2171                 return -ENOMEM;
2172         return 0;
2173 }
2174
2175 void tcp6_proc_exit(struct net *net)
2176 {
2177         remove_proc_entry("tcp6", net->proc_net);
2178 }
2179 #endif
2180
2181 struct proto tcpv6_prot = {
2182         .name                   = "TCPv6",
2183         .owner                  = THIS_MODULE,
2184         .close                  = tcp_close,
2185         .pre_connect            = tcp_v6_pre_connect,
2186         .connect                = tcp_v6_connect,
2187         .disconnect             = tcp_disconnect,
2188         .accept                 = inet_csk_accept,
2189         .ioctl                  = tcp_ioctl,
2190         .init                   = tcp_v6_init_sock,
2191         .destroy                = tcp_v6_destroy_sock,
2192         .shutdown               = tcp_shutdown,
2193         .setsockopt             = tcp_setsockopt,
2194         .getsockopt             = tcp_getsockopt,
2195         .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2196         .keepalive              = tcp_set_keepalive,
2197         .recvmsg                = tcp_recvmsg,
2198         .sendmsg                = tcp_sendmsg,
2199         .sendpage               = tcp_sendpage,
2200         .backlog_rcv            = tcp_v6_do_rcv,
2201         .release_cb             = tcp_release_cb,
2202         .hash                   = inet6_hash,
2203         .unhash                 = inet_unhash,
2204         .get_port               = inet_csk_get_port,
2205         .put_port               = inet_put_port,
2206 #ifdef CONFIG_BPF_SYSCALL
2207         .psock_update_sk_prot   = tcp_bpf_update_proto,
2208 #endif
2209         .enter_memory_pressure  = tcp_enter_memory_pressure,
2210         .leave_memory_pressure  = tcp_leave_memory_pressure,
2211         .stream_memory_free     = tcp_stream_memory_free,
2212         .sockets_allocated      = &tcp_sockets_allocated,
2213         .memory_allocated       = &tcp_memory_allocated,
2214         .memory_pressure        = &tcp_memory_pressure,
2215         .orphan_count           = &tcp_orphan_count,
2216         .sysctl_mem             = sysctl_tcp_mem,
2217         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2218         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2219         .max_header             = MAX_TCP_HEADER,
2220         .obj_size               = sizeof(struct tcp6_sock),
2221         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2222         .twsk_prot              = &tcp6_timewait_sock_ops,
2223         .rsk_prot               = &tcp6_request_sock_ops,
2224         .h.hashinfo             = &tcp_hashinfo,
2225         .no_autobind            = true,
2226         .diag_destroy           = tcp_abort,
2227 };
2228 EXPORT_SYMBOL_GPL(tcpv6_prot);
2229
2230 /* thinking of making this const? Don't.
2231  * early_demux can change based on sysctl.
2232  */
2233 static struct inet6_protocol tcpv6_protocol = {
2234         .early_demux    =       tcp_v6_early_demux,
2235         .early_demux_handler =  tcp_v6_early_demux,
2236         .handler        =       tcp_v6_rcv,
2237         .err_handler    =       tcp_v6_err,
2238         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2239 };
2240
2241 static struct inet_protosw tcpv6_protosw = {
2242         .type           =       SOCK_STREAM,
2243         .protocol       =       IPPROTO_TCP,
2244         .prot           =       &tcpv6_prot,
2245         .ops            =       &inet6_stream_ops,
2246         .flags          =       INET_PROTOSW_PERMANENT |
2247                                 INET_PROTOSW_ICSK,
2248 };
2249
2250 static int __net_init tcpv6_net_init(struct net *net)
2251 {
2252         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2253                                     SOCK_RAW, IPPROTO_TCP, net);
2254 }
2255
2256 static void __net_exit tcpv6_net_exit(struct net *net)
2257 {
2258         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2259 }
2260
2261 static struct pernet_operations tcpv6_net_ops = {
2262         .init       = tcpv6_net_init,
2263         .exit       = tcpv6_net_exit,
2264 };
2265
2266 int __init tcpv6_init(void)
2267 {
2268         int ret;
2269
2270         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2271         if (ret)
2272                 goto out;
2273
2274         /* register inet6 protocol */
2275         ret = inet6_register_protosw(&tcpv6_protosw);
2276         if (ret)
2277                 goto out_tcpv6_protocol;
2278
2279         ret = register_pernet_subsys(&tcpv6_net_ops);
2280         if (ret)
2281                 goto out_tcpv6_protosw;
2282
2283         ret = mptcpv6_init();
2284         if (ret)
2285                 goto out_tcpv6_pernet_subsys;
2286
2287 out:
2288         return ret;
2289
2290 out_tcpv6_pernet_subsys:
2291         unregister_pernet_subsys(&tcpv6_net_ops);
2292 out_tcpv6_protosw:
2293         inet6_unregister_protosw(&tcpv6_protosw);
2294 out_tcpv6_protocol:
2295         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2296         goto out;
2297 }
2298
2299 void tcpv6_exit(void)
2300 {
2301         unregister_pernet_subsys(&tcpv6_net_ops);
2302         inet6_unregister_protosw(&tcpv6_protosw);
2303         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2304 }