2 * IPv6 output functions
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
42 #include <linux/bpf-cgroup.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
57 #include <net/checksum.h>
58 #include <linux/mroute6.h>
59 #include <net/l3mdev.h>
60 #include <net/lwtunnel.h>
62 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
64 struct dst_entry *dst = skb_dst(skb);
65 struct net_device *dev = dst->dev;
66 struct neighbour *neigh;
67 struct in6_addr *nexthop;
70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
74 ((mroute6_is_socket(net, skb) &&
75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 &ipv6_hdr(skb)->saddr))) {
78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85 net, sk, newskb, NULL, newskb->dev,
88 if (ipv6_hdr(skb)->hop_limit == 0) {
89 IP6_INC_STATS(net, idev,
90 IPSTATS_MIB_OUTDISCARDS);
96 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
98 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
99 IPV6_ADDR_SCOPE_NODELOCAL &&
100 !(dev->flags & IFF_LOOPBACK)) {
106 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
107 int res = lwtunnel_xmit(skb);
109 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
114 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
115 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
116 if (unlikely(!neigh))
117 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
118 if (!IS_ERR(neigh)) {
119 sock_confirm_neigh(skb, neigh);
120 ret = neigh_output(neigh, skb);
121 rcu_read_unlock_bh();
124 rcu_read_unlock_bh();
126 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
131 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
135 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
141 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
142 /* Policy lookup after SNAT yielded a new policy */
143 if (skb_dst(skb)->xfrm) {
144 IPCB(skb)->flags |= IPSKB_REROUTED;
145 return dst_output(net, sk, skb);
149 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
150 dst_allfrag(skb_dst(skb)) ||
151 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
152 return ip6_fragment(net, sk, skb, ip6_finish_output2);
154 return ip6_finish_output2(net, sk, skb);
157 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
159 struct net_device *dev = skb_dst(skb)->dev;
160 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
162 skb->protocol = htons(ETH_P_IPV6);
165 if (unlikely(idev->cnf.disable_ipv6)) {
166 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
171 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
172 net, sk, skb, NULL, dev,
174 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
177 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
179 if (!np->autoflowlabel_set)
180 return ip6_default_np_autolabel(net);
182 return np->autoflowlabel;
186 * xmit an sk_buff (used by TCP, SCTP and DCCP)
187 * Note : socket lock is not held for SYNACK packets, but might be modified
188 * by calls to skb_set_owner_w() and ipv6_local_error(),
189 * which are using proper atomic operations or spinlocks.
191 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
192 __u32 mark, struct ipv6_txoptions *opt, int tclass)
194 struct net *net = sock_net(sk);
195 const struct ipv6_pinfo *np = inet6_sk(sk);
196 struct in6_addr *first_hop = &fl6->daddr;
197 struct dst_entry *dst = skb_dst(skb);
199 u8 proto = fl6->flowi6_proto;
200 int seg_len = skb->len;
205 unsigned int head_room;
207 /* First: exthdrs may take lots of space (~8K for now)
208 MAX_HEADER is not enough.
210 head_room = opt->opt_nflen + opt->opt_flen;
211 seg_len += head_room;
212 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
214 if (skb_headroom(skb) < head_room) {
215 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
217 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
218 IPSTATS_MIB_OUTDISCARDS);
224 /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
225 * it is safe to call in our context (socket lock not held)
227 skb_set_owner_w(skb, (struct sock *)sk);
230 ipv6_push_frag_opts(skb, opt, &proto);
232 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
236 skb_push(skb, sizeof(struct ipv6hdr));
237 skb_reset_network_header(skb);
241 * Fill in the IPv6 header
244 hlimit = np->hop_limit;
246 hlimit = ip6_dst_hoplimit(dst);
248 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
249 ip6_autoflowlabel(net, np), fl6));
251 hdr->payload_len = htons(seg_len);
252 hdr->nexthdr = proto;
253 hdr->hop_limit = hlimit;
255 hdr->saddr = fl6->saddr;
256 hdr->daddr = *first_hop;
258 skb->protocol = htons(ETH_P_IPV6);
259 skb->priority = sk->sk_priority;
263 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
264 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
265 IPSTATS_MIB_OUT, skb->len);
267 /* if egress device is enslaved to an L3 master device pass the
268 * skb to its handler for processing
270 skb = l3mdev_ip6_out((struct sock *)sk, skb);
274 /* hooks should never assume socket lock is held.
275 * we promote our socket to non const
277 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
278 net, (struct sock *)sk, skb, NULL, dst->dev,
283 /* ipv6_local_error() does not require socket lock,
284 * we promote our socket to non const
286 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
288 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
292 EXPORT_SYMBOL(ip6_xmit);
294 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
296 struct ip6_ra_chain *ra;
297 struct sock *last = NULL;
299 read_lock(&ip6_ra_lock);
300 for (ra = ip6_ra_chain; ra; ra = ra->next) {
301 struct sock *sk = ra->sk;
302 if (sk && ra->sel == sel &&
303 (!sk->sk_bound_dev_if ||
304 sk->sk_bound_dev_if == skb->dev->ifindex)) {
306 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
308 rawv6_rcv(last, skb2);
315 rawv6_rcv(last, skb);
316 read_unlock(&ip6_ra_lock);
319 read_unlock(&ip6_ra_lock);
323 static int ip6_forward_proxy_check(struct sk_buff *skb)
325 struct ipv6hdr *hdr = ipv6_hdr(skb);
326 u8 nexthdr = hdr->nexthdr;
330 if (ipv6_ext_hdr(nexthdr)) {
331 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
335 offset = sizeof(struct ipv6hdr);
337 if (nexthdr == IPPROTO_ICMPV6) {
338 struct icmp6hdr *icmp6;
340 if (!pskb_may_pull(skb, (skb_network_header(skb) +
341 offset + 1 - skb->data)))
344 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
346 switch (icmp6->icmp6_type) {
347 case NDISC_ROUTER_SOLICITATION:
348 case NDISC_ROUTER_ADVERTISEMENT:
349 case NDISC_NEIGHBOUR_SOLICITATION:
350 case NDISC_NEIGHBOUR_ADVERTISEMENT:
352 /* For reaction involving unicast neighbor discovery
353 * message destined to the proxied address, pass it to
363 * The proxying router can't forward traffic sent to a link-local
364 * address, so signal the sender and discard the packet. This
365 * behavior is clarified by the MIPv6 specification.
367 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
368 dst_link_failure(skb);
375 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
378 struct dst_entry *dst = skb_dst(skb);
380 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
381 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
383 return dst_output(net, sk, skb);
386 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
391 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
392 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
398 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
404 int ip6_forward(struct sk_buff *skb)
406 struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
407 struct dst_entry *dst = skb_dst(skb);
408 struct ipv6hdr *hdr = ipv6_hdr(skb);
409 struct inet6_skb_parm *opt = IP6CB(skb);
410 struct net *net = dev_net(dst->dev);
413 if (net->ipv6.devconf_all->forwarding == 0)
416 if (skb->pkt_type != PACKET_HOST)
419 if (unlikely(skb->sk))
422 if (skb_warn_if_lro(skb))
425 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
426 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
430 skb_forward_csum(skb);
433 * We DO NOT make any processing on
434 * RA packets, pushing them to user level AS IS
435 * without ane WARRANTY that application will be able
436 * to interpret them. The reason is that we
437 * cannot make anything clever here.
439 * We are not end-node, so that if packet contains
440 * AH/ESP, we cannot make anything.
441 * Defragmentation also would be mistake, RA packets
442 * cannot be fragmented, because there is no warranty
443 * that different fragments will go along one path. --ANK
445 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
446 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
451 * check and decrement ttl
453 if (hdr->hop_limit <= 1) {
454 /* Force OUTPUT device used as source address */
456 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
457 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
463 /* XXX: idev->cnf.proxy_ndp? */
464 if (net->ipv6.devconf_all->proxy_ndp &&
465 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
466 int proxied = ip6_forward_proxy_check(skb);
468 return ip6_input(skb);
469 else if (proxied < 0) {
470 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
475 if (!xfrm6_route_forward(skb)) {
476 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
481 /* IPv6 specs say nothing about it, but it is clear that we cannot
482 send redirects to source routed frames.
483 We don't send redirects to frames decapsulated from IPsec.
485 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
486 struct in6_addr *target = NULL;
487 struct inet_peer *peer;
491 * incoming and outgoing devices are the same
495 rt = (struct rt6_info *) dst;
496 if (rt->rt6i_flags & RTF_GATEWAY)
497 target = &rt->rt6i_gateway;
499 target = &hdr->daddr;
501 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
503 /* Limit redirects both by destination (here)
504 and by source (inside ndisc_send_redirect)
506 if (inet_peer_xrlim_allow(peer, 1*HZ))
507 ndisc_send_redirect(skb, target);
511 int addrtype = ipv6_addr_type(&hdr->saddr);
513 /* This check is security critical. */
514 if (addrtype == IPV6_ADDR_ANY ||
515 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
517 if (addrtype & IPV6_ADDR_LINKLOCAL) {
518 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
519 ICMPV6_NOT_NEIGHBOUR, 0);
524 mtu = ip6_dst_mtu_forward(dst);
525 if (mtu < IPV6_MIN_MTU)
528 if (ip6_pkt_too_big(skb, mtu)) {
529 /* Again, force OUTPUT device used as source address */
531 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
532 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
533 __IP6_INC_STATS(net, ip6_dst_idev(dst),
534 IPSTATS_MIB_FRAGFAILS);
539 if (skb_cow(skb, dst->dev->hard_header_len)) {
540 __IP6_INC_STATS(net, ip6_dst_idev(dst),
541 IPSTATS_MIB_OUTDISCARDS);
547 /* Mangling hops number delayed to point after skb COW */
551 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
552 net, NULL, skb, skb->dev, dst->dev,
556 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
562 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
564 to->pkt_type = from->pkt_type;
565 to->priority = from->priority;
566 to->protocol = from->protocol;
568 skb_dst_set(to, dst_clone(skb_dst(from)));
570 to->mark = from->mark;
572 #ifdef CONFIG_NET_SCHED
573 to->tc_index = from->tc_index;
576 skb_copy_secmark(to, from);
579 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
580 int (*output)(struct net *, struct sock *, struct sk_buff *))
582 struct sk_buff *frag;
583 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
584 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
585 inet6_sk(skb->sk) : NULL;
586 struct ipv6hdr *tmp_hdr;
588 unsigned int mtu, hlen, left, len;
591 int ptr, offset = 0, err = 0;
592 u8 *prevhdr, nexthdr = 0;
594 err = ip6_find_1stfragopt(skb, &prevhdr);
600 mtu = ip6_skb_dst_mtu(skb);
602 /* We must not fragment if the socket is set to force MTU discovery
603 * or if the skb it not generated by a local socket.
605 if (unlikely(!skb->ignore_df && skb->len > mtu))
608 if (IP6CB(skb)->frag_max_size) {
609 if (IP6CB(skb)->frag_max_size > mtu)
612 /* don't send fragments larger than what we received */
613 mtu = IP6CB(skb)->frag_max_size;
614 if (mtu < IPV6_MIN_MTU)
618 if (np && np->frag_size < mtu) {
622 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
624 mtu -= hlen + sizeof(struct frag_hdr);
626 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
627 &ipv6_hdr(skb)->saddr);
629 if (skb->ip_summed == CHECKSUM_PARTIAL &&
630 (err = skb_checksum_help(skb)))
633 hroom = LL_RESERVED_SPACE(rt->dst.dev);
634 if (skb_has_frag_list(skb)) {
635 unsigned int first_len = skb_pagelen(skb);
636 struct sk_buff *frag2;
638 if (first_len - hlen > mtu ||
639 ((first_len - hlen) & 7) ||
641 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
644 skb_walk_frags(skb, frag) {
645 /* Correct geometry. */
646 if (frag->len > mtu ||
647 ((frag->len & 7) && frag->next) ||
648 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
649 goto slow_path_clean;
651 /* Partially cloned skb? */
652 if (skb_shared(frag))
653 goto slow_path_clean;
658 frag->destructor = sock_wfree;
660 skb->truesize -= frag->truesize;
667 *prevhdr = NEXTHDR_FRAGMENT;
668 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
673 frag = skb_shinfo(skb)->frag_list;
674 skb_frag_list_init(skb);
676 __skb_pull(skb, hlen);
677 fh = __skb_push(skb, sizeof(struct frag_hdr));
678 __skb_push(skb, hlen);
679 skb_reset_network_header(skb);
680 memcpy(skb_network_header(skb), tmp_hdr, hlen);
682 fh->nexthdr = nexthdr;
684 fh->frag_off = htons(IP6_MF);
685 fh->identification = frag_id;
687 first_len = skb_pagelen(skb);
688 skb->data_len = first_len - skb_headlen(skb);
689 skb->len = first_len;
690 ipv6_hdr(skb)->payload_len = htons(first_len -
691 sizeof(struct ipv6hdr));
694 /* Prepare header of the next frame,
695 * before previous one went down. */
697 frag->ip_summed = CHECKSUM_NONE;
698 skb_reset_transport_header(frag);
699 fh = __skb_push(frag, sizeof(struct frag_hdr));
700 __skb_push(frag, hlen);
701 skb_reset_network_header(frag);
702 memcpy(skb_network_header(frag), tmp_hdr,
704 offset += skb->len - hlen - sizeof(struct frag_hdr);
705 fh->nexthdr = nexthdr;
707 fh->frag_off = htons(offset);
709 fh->frag_off |= htons(IP6_MF);
710 fh->identification = frag_id;
711 ipv6_hdr(frag)->payload_len =
713 sizeof(struct ipv6hdr));
714 ip6_copy_metadata(frag, skb);
717 err = output(net, sk, skb);
719 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
720 IPSTATS_MIB_FRAGCREATES);
733 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
734 IPSTATS_MIB_FRAGOKS);
738 kfree_skb_list(frag);
740 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
741 IPSTATS_MIB_FRAGFAILS);
745 skb_walk_frags(skb, frag2) {
749 frag2->destructor = NULL;
750 skb->truesize += frag2->truesize;
755 left = skb->len - hlen; /* Space per frame */
756 ptr = hlen; /* Where to start from */
759 * Fragment the datagram.
762 troom = rt->dst.dev->needed_tailroom;
765 * Keep copying data until we run out.
768 u8 *fragnexthdr_offset;
771 /* IF: it doesn't fit, use 'mtu' - the data space left */
774 /* IF: we are not sending up to and including the packet end
775 then align the next start on an eight byte boundary */
780 /* Allocate buffer */
781 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
782 hroom + troom, GFP_ATOMIC);
789 * Set up data on packet
792 ip6_copy_metadata(frag, skb);
793 skb_reserve(frag, hroom);
794 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
795 skb_reset_network_header(frag);
796 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
797 frag->transport_header = (frag->network_header + hlen +
798 sizeof(struct frag_hdr));
801 * Charge the memory for the fragment to any owner
805 skb_set_owner_w(frag, skb->sk);
808 * Copy the packet header into the new buffer.
810 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
812 fragnexthdr_offset = skb_network_header(frag);
813 fragnexthdr_offset += prevhdr - skb_network_header(skb);
814 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
817 * Build fragment header.
819 fh->nexthdr = nexthdr;
821 fh->identification = frag_id;
824 * Copy a block of the IP datagram.
826 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
830 fh->frag_off = htons(offset);
832 fh->frag_off |= htons(IP6_MF);
833 ipv6_hdr(frag)->payload_len = htons(frag->len -
834 sizeof(struct ipv6hdr));
840 * Put this fragment into the sending queue.
842 err = output(net, sk, frag);
846 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
847 IPSTATS_MIB_FRAGCREATES);
849 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
850 IPSTATS_MIB_FRAGOKS);
855 if (skb->sk && dst_allfrag(skb_dst(skb)))
856 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
858 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
862 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
863 IPSTATS_MIB_FRAGFAILS);
868 static inline int ip6_rt_check(const struct rt6key *rt_key,
869 const struct in6_addr *fl_addr,
870 const struct in6_addr *addr_cache)
872 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
873 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
876 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
877 struct dst_entry *dst,
878 const struct flowi6 *fl6)
880 struct ipv6_pinfo *np = inet6_sk(sk);
886 if (dst->ops->family != AF_INET6) {
891 rt = (struct rt6_info *)dst;
892 /* Yes, checking route validity in not connected
893 * case is not very simple. Take into account,
894 * that we do not support routing by source, TOS,
895 * and MSG_DONTROUTE --ANK (980726)
897 * 1. ip6_rt_check(): If route was host route,
898 * check that cached destination is current.
899 * If it is network route, we still may
900 * check its validity using saved pointer
901 * to the last used address: daddr_cache.
902 * We do not want to save whole address now,
903 * (because main consumer of this service
904 * is tcp, which has not this problem),
905 * so that the last trick works only on connected
907 * 2. oif also should be the same.
909 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
910 #ifdef CONFIG_IPV6_SUBTREES
911 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
913 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
914 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
923 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
924 struct dst_entry **dst, struct flowi6 *fl6)
926 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
933 /* The correct way to handle this would be to do
934 * ip6_route_get_saddr, and then ip6_route_output; however,
935 * the route-specific preferred source forces the
936 * ip6_route_output call _before_ ip6_route_get_saddr.
938 * In source specific routing (no src=any default route),
939 * ip6_route_output will fail given src=any saddr, though, so
940 * that's why we try it again later.
942 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
943 struct fib6_info *from;
945 bool had_dst = *dst != NULL;
948 *dst = ip6_route_output(net, sk, fl6);
949 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
952 from = rt ? rcu_dereference(rt->from) : NULL;
953 err = ip6_route_get_saddr(net, from, &fl6->daddr,
954 sk ? inet6_sk(sk)->srcprefs : 0,
959 goto out_err_release;
961 /* If we had an erroneous initial result, pretend it
962 * never existed and let the SA-enabled version take
965 if (!had_dst && (*dst)->error) {
971 flags |= RT6_LOOKUP_F_IFACE;
975 *dst = ip6_route_output_flags(net, sk, fl6, flags);
979 goto out_err_release;
981 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
983 * Here if the dst entry we've looked up
984 * has a neighbour entry that is in the INCOMPLETE
985 * state and the src address from the flow is
986 * marked as OPTIMISTIC, we release the found
987 * dst entry and replace it instead with the
988 * dst entry of the nexthop router
990 rt = (struct rt6_info *) *dst;
992 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
993 rt6_nexthop(rt, &fl6->daddr));
994 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
995 rcu_read_unlock_bh();
998 struct inet6_ifaddr *ifp;
999 struct flowi6 fl_gw6;
1002 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1005 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1011 * We need to get the dst entry for the
1012 * default router instead
1015 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1016 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1017 *dst = ip6_route_output(net, sk, &fl_gw6);
1018 err = (*dst)->error;
1020 goto out_err_release;
1024 if (ipv6_addr_v4mapped(&fl6->saddr) &&
1025 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1026 err = -EAFNOSUPPORT;
1027 goto out_err_release;
1036 if (err == -ENETUNREACH)
1037 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1042 * ip6_dst_lookup - perform route lookup on flow
1043 * @sk: socket which provides route info
1044 * @dst: pointer to dst_entry * for result
1045 * @fl6: flow to lookup
1047 * This function performs a route lookup on the given flow.
1049 * It returns zero on success, or a standard errno code on error.
1051 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1055 return ip6_dst_lookup_tail(net, sk, dst, fl6);
1057 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1060 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1061 * @sk: socket which provides route info
1062 * @fl6: flow to lookup
1063 * @final_dst: final destination address for ipsec lookup
1065 * This function performs a route lookup on the given flow.
1067 * It returns a valid dst pointer on success, or a pointer encoded
1070 struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
1071 const struct in6_addr *final_dst)
1073 struct dst_entry *dst = NULL;
1076 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
1078 return ERR_PTR(err);
1080 fl6->daddr = *final_dst;
1082 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1084 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1087 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1088 * @sk: socket which provides the dst cache and route info
1089 * @fl6: flow to lookup
1090 * @final_dst: final destination address for ipsec lookup
1091 * @connected: whether @sk is connected or not
1093 * This function performs a route lookup on the given flow with the
1094 * possibility of using the cached route in the socket if it is valid.
1095 * It will take the socket dst lock when operating on the dst cache.
1096 * As a result, this function can only be used in process context.
1098 * In addition, for a connected socket, cache the dst in the socket
1099 * if the current cache is not valid.
1101 * It returns a valid dst pointer on success, or a pointer encoded
1104 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1105 const struct in6_addr *final_dst,
1108 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1110 dst = ip6_sk_dst_check(sk, dst, fl6);
1114 dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
1115 if (connected && !IS_ERR(dst))
1116 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1120 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1122 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1125 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1128 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1131 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1134 static void ip6_append_data_mtu(unsigned int *mtu,
1136 unsigned int fragheaderlen,
1137 struct sk_buff *skb,
1138 struct rt6_info *rt,
1139 unsigned int orig_mtu)
1141 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1143 /* first fragment, reserve header_len */
1144 *mtu = orig_mtu - rt->dst.header_len;
1148 * this fragment is not first, the headers
1149 * space is regarded as data space.
1153 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1154 + fragheaderlen - sizeof(struct frag_hdr);
1158 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1159 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1160 struct rt6_info *rt, struct flowi6 *fl6)
1162 struct ipv6_pinfo *np = inet6_sk(sk);
1164 struct ipv6_txoptions *opt = ipc6->opt;
1170 if (WARN_ON(v6_cork->opt))
1173 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1174 if (unlikely(!v6_cork->opt))
1177 v6_cork->opt->tot_len = sizeof(*opt);
1178 v6_cork->opt->opt_flen = opt->opt_flen;
1179 v6_cork->opt->opt_nflen = opt->opt_nflen;
1181 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1183 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1186 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1188 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1191 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1193 if (opt->hopopt && !v6_cork->opt->hopopt)
1196 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1198 if (opt->srcrt && !v6_cork->opt->srcrt)
1201 /* need source address above miyazawa*/
1204 cork->base.dst = &rt->dst;
1205 cork->fl.u.ip6 = *fl6;
1206 v6_cork->hop_limit = ipc6->hlimit;
1207 v6_cork->tclass = ipc6->tclass;
1208 if (rt->dst.flags & DST_XFRM_TUNNEL)
1209 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1210 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1212 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1213 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1214 if (np->frag_size < mtu) {
1216 mtu = np->frag_size;
1218 if (mtu < IPV6_MIN_MTU)
1220 cork->base.fragsize = mtu;
1221 cork->base.gso_size = sk->sk_type == SOCK_DGRAM ? ipc6->gso_size : 0;
1223 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1224 cork->base.flags |= IPCORK_ALLFRAG;
1225 cork->base.length = 0;
1230 static int __ip6_append_data(struct sock *sk,
1232 struct sk_buff_head *queue,
1233 struct inet_cork *cork,
1234 struct inet6_cork *v6_cork,
1235 struct page_frag *pfrag,
1236 int getfrag(void *from, char *to, int offset,
1237 int len, int odd, struct sk_buff *skb),
1238 void *from, int length, int transhdrlen,
1239 unsigned int flags, struct ipcm6_cookie *ipc6,
1240 const struct sockcm_cookie *sockc)
1242 struct sk_buff *skb, *skb_prev = NULL;
1243 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1245 int dst_exthdrlen = 0;
1252 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1253 struct ipv6_txoptions *opt = v6_cork->opt;
1254 int csummode = CHECKSUM_NONE;
1255 unsigned int maxnonfragsize, headersize;
1256 unsigned int wmem_alloc_delta = 0;
1259 skb = skb_peek_tail(queue);
1261 exthdrlen = opt ? opt->opt_flen : 0;
1262 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1265 paged = !!cork->gso_size;
1266 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1269 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1271 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1272 (opt ? opt->opt_nflen : 0);
1273 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1274 sizeof(struct frag_hdr);
1276 headersize = sizeof(struct ipv6hdr) +
1277 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1278 (dst_allfrag(&rt->dst) ?
1279 sizeof(struct frag_hdr) : 0) +
1280 rt->rt6i_nfheader_len;
1282 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1283 * the first fragment
1285 if (headersize + transhdrlen > mtu)
1288 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1289 (sk->sk_protocol == IPPROTO_UDP ||
1290 sk->sk_protocol == IPPROTO_RAW)) {
1291 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1292 sizeof(struct ipv6hdr));
1296 if (ip6_sk_ignore_df(sk))
1297 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1299 maxnonfragsize = mtu;
1301 if (cork->length + length > maxnonfragsize - headersize) {
1303 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1304 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1308 /* CHECKSUM_PARTIAL only with no extension headers and when
1309 * we are not going to fragment
1311 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1312 headersize == sizeof(struct ipv6hdr) &&
1313 length <= mtu - headersize &&
1314 (!(flags & MSG_MORE) || cork->gso_size) &&
1315 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1316 csummode = CHECKSUM_PARTIAL;
1318 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
1319 sock_tx_timestamp(sk, sockc->tsflags, &tx_flags);
1320 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1321 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1322 tskey = sk->sk_tskey++;
1326 * Let's try using as much space as possible.
1327 * Use MTU if total length of the message fits into the MTU.
1328 * Otherwise, we need to reserve fragment header and
1329 * fragment alignment (= 8-15 octects, in total).
1331 * Note that we may need to "move" the data from the tail of
1332 * of the buffer to the new fragment when we split
1335 * FIXME: It may be fragmented into multiple chunks
1336 * at once if non-fragmentable extension headers
1341 cork->length += length;
1345 while (length > 0) {
1346 /* Check if the remaining data fits into current packet. */
1347 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1349 copy = maxfraglen - skb->len;
1353 unsigned int datalen;
1354 unsigned int fraglen;
1355 unsigned int fraggap;
1356 unsigned int alloclen;
1357 unsigned int pagedlen = 0;
1359 /* There's no room in the current skb */
1361 fraggap = skb->len - maxfraglen;
1364 /* update mtu and maxfraglen if necessary */
1365 if (!skb || !skb_prev)
1366 ip6_append_data_mtu(&mtu, &maxfraglen,
1367 fragheaderlen, skb, rt,
1373 * If remaining data exceeds the mtu,
1374 * we know we need more fragment(s).
1376 datalen = length + fraggap;
1378 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1379 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1380 fraglen = datalen + fragheaderlen;
1382 if ((flags & MSG_MORE) &&
1383 !(rt->dst.dev->features&NETIF_F_SG))
1388 alloclen = min_t(int, fraglen, MAX_HEADER);
1389 pagedlen = fraglen - alloclen;
1392 alloclen += dst_exthdrlen;
1394 if (datalen != length + fraggap) {
1396 * this is not the last fragment, the trailer
1397 * space is regarded as data space.
1399 datalen += rt->dst.trailer_len;
1402 alloclen += rt->dst.trailer_len;
1403 fraglen = datalen + fragheaderlen;
1406 * We just reserve space for fragment header.
1407 * Note: this may be overallocation if the message
1408 * (without MSG_MORE) fits into the MTU.
1410 alloclen += sizeof(struct frag_hdr);
1412 copy = datalen - transhdrlen - fraggap - pagedlen;
1418 skb = sock_alloc_send_skb(sk,
1420 (flags & MSG_DONTWAIT), &err);
1423 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1425 skb = alloc_skb(alloclen + hh_len,
1433 * Fill in the control structures
1435 skb->protocol = htons(ETH_P_IPV6);
1436 skb->ip_summed = csummode;
1438 /* reserve for fragmentation and ipsec header */
1439 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1442 /* Only the initial fragment is time stamped */
1443 skb_shinfo(skb)->tx_flags = tx_flags;
1445 skb_shinfo(skb)->tskey = tskey;
1449 * Find where to start putting bytes
1451 data = skb_put(skb, fraglen - pagedlen);
1452 skb_set_network_header(skb, exthdrlen);
1453 data += fragheaderlen;
1454 skb->transport_header = (skb->network_header +
1457 skb->csum = skb_copy_and_csum_bits(
1458 skb_prev, maxfraglen,
1459 data + transhdrlen, fraggap, 0);
1460 skb_prev->csum = csum_sub(skb_prev->csum,
1463 pskb_trim_unique(skb_prev, maxfraglen);
1466 getfrag(from, data + transhdrlen, offset,
1467 copy, fraggap, skb) < 0) {
1474 length -= copy + transhdrlen;
1479 if ((flags & MSG_CONFIRM) && !skb_prev)
1480 skb_set_dst_pending_confirm(skb, 1);
1483 * Put the packet on the pending queue
1485 if (!skb->destructor) {
1486 skb->destructor = sock_wfree;
1488 wmem_alloc_delta += skb->truesize;
1490 __skb_queue_tail(queue, skb);
1497 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1498 skb_tailroom(skb) >= copy) {
1502 if (getfrag(from, skb_put(skb, copy),
1503 offset, copy, off, skb) < 0) {
1504 __skb_trim(skb, off);
1509 int i = skb_shinfo(skb)->nr_frags;
1512 if (!sk_page_frag_refill(sk, pfrag))
1515 if (!skb_can_coalesce(skb, i, pfrag->page,
1518 if (i == MAX_SKB_FRAGS)
1521 __skb_fill_page_desc(skb, i, pfrag->page,
1523 skb_shinfo(skb)->nr_frags = ++i;
1524 get_page(pfrag->page);
1526 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1528 page_address(pfrag->page) + pfrag->offset,
1529 offset, copy, skb->len, skb) < 0)
1532 pfrag->offset += copy;
1533 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1535 skb->data_len += copy;
1536 skb->truesize += copy;
1537 wmem_alloc_delta += copy;
1543 if (wmem_alloc_delta)
1544 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1550 cork->length -= length;
1551 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1552 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1556 int ip6_append_data(struct sock *sk,
1557 int getfrag(void *from, char *to, int offset, int len,
1558 int odd, struct sk_buff *skb),
1559 void *from, int length, int transhdrlen,
1560 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1561 struct rt6_info *rt, unsigned int flags,
1562 const struct sockcm_cookie *sockc)
1564 struct inet_sock *inet = inet_sk(sk);
1565 struct ipv6_pinfo *np = inet6_sk(sk);
1569 if (flags&MSG_PROBE)
1571 if (skb_queue_empty(&sk->sk_write_queue)) {
1575 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1580 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1581 length += exthdrlen;
1582 transhdrlen += exthdrlen;
1584 fl6 = &inet->cork.fl.u.ip6;
1588 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1589 &np->cork, sk_page_frag(sk), getfrag,
1590 from, length, transhdrlen, flags, ipc6, sockc);
1592 EXPORT_SYMBOL_GPL(ip6_append_data);
1594 static void ip6_cork_release(struct inet_cork_full *cork,
1595 struct inet6_cork *v6_cork)
1598 kfree(v6_cork->opt->dst0opt);
1599 kfree(v6_cork->opt->dst1opt);
1600 kfree(v6_cork->opt->hopopt);
1601 kfree(v6_cork->opt->srcrt);
1602 kfree(v6_cork->opt);
1603 v6_cork->opt = NULL;
1606 if (cork->base.dst) {
1607 dst_release(cork->base.dst);
1608 cork->base.dst = NULL;
1609 cork->base.flags &= ~IPCORK_ALLFRAG;
1611 memset(&cork->fl, 0, sizeof(cork->fl));
1614 struct sk_buff *__ip6_make_skb(struct sock *sk,
1615 struct sk_buff_head *queue,
1616 struct inet_cork_full *cork,
1617 struct inet6_cork *v6_cork)
1619 struct sk_buff *skb, *tmp_skb;
1620 struct sk_buff **tail_skb;
1621 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1622 struct ipv6_pinfo *np = inet6_sk(sk);
1623 struct net *net = sock_net(sk);
1624 struct ipv6hdr *hdr;
1625 struct ipv6_txoptions *opt = v6_cork->opt;
1626 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1627 struct flowi6 *fl6 = &cork->fl.u.ip6;
1628 unsigned char proto = fl6->flowi6_proto;
1630 skb = __skb_dequeue(queue);
1633 tail_skb = &(skb_shinfo(skb)->frag_list);
1635 /* move skb->data to ip header from ext header */
1636 if (skb->data < skb_network_header(skb))
1637 __skb_pull(skb, skb_network_offset(skb));
1638 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1639 __skb_pull(tmp_skb, skb_network_header_len(skb));
1640 *tail_skb = tmp_skb;
1641 tail_skb = &(tmp_skb->next);
1642 skb->len += tmp_skb->len;
1643 skb->data_len += tmp_skb->len;
1644 skb->truesize += tmp_skb->truesize;
1645 tmp_skb->destructor = NULL;
1649 /* Allow local fragmentation. */
1650 skb->ignore_df = ip6_sk_ignore_df(sk);
1652 *final_dst = fl6->daddr;
1653 __skb_pull(skb, skb_network_header_len(skb));
1654 if (opt && opt->opt_flen)
1655 ipv6_push_frag_opts(skb, opt, &proto);
1656 if (opt && opt->opt_nflen)
1657 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1659 skb_push(skb, sizeof(struct ipv6hdr));
1660 skb_reset_network_header(skb);
1661 hdr = ipv6_hdr(skb);
1663 ip6_flow_hdr(hdr, v6_cork->tclass,
1664 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1665 ip6_autoflowlabel(net, np), fl6));
1666 hdr->hop_limit = v6_cork->hop_limit;
1667 hdr->nexthdr = proto;
1668 hdr->saddr = fl6->saddr;
1669 hdr->daddr = *final_dst;
1671 skb->priority = sk->sk_priority;
1672 skb->mark = sk->sk_mark;
1674 skb_dst_set(skb, dst_clone(&rt->dst));
1675 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1676 if (proto == IPPROTO_ICMPV6) {
1677 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1679 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1680 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1683 ip6_cork_release(cork, v6_cork);
1688 int ip6_send_skb(struct sk_buff *skb)
1690 struct net *net = sock_net(skb->sk);
1691 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1694 err = ip6_local_out(net, skb->sk, skb);
1697 err = net_xmit_errno(err);
1699 IP6_INC_STATS(net, rt->rt6i_idev,
1700 IPSTATS_MIB_OUTDISCARDS);
1706 int ip6_push_pending_frames(struct sock *sk)
1708 struct sk_buff *skb;
1710 skb = ip6_finish_skb(sk);
1714 return ip6_send_skb(skb);
1716 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1718 static void __ip6_flush_pending_frames(struct sock *sk,
1719 struct sk_buff_head *queue,
1720 struct inet_cork_full *cork,
1721 struct inet6_cork *v6_cork)
1723 struct sk_buff *skb;
1725 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1727 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1728 IPSTATS_MIB_OUTDISCARDS);
1732 ip6_cork_release(cork, v6_cork);
1735 void ip6_flush_pending_frames(struct sock *sk)
1737 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1738 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1740 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1742 struct sk_buff *ip6_make_skb(struct sock *sk,
1743 int getfrag(void *from, char *to, int offset,
1744 int len, int odd, struct sk_buff *skb),
1745 void *from, int length, int transhdrlen,
1746 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1747 struct rt6_info *rt, unsigned int flags,
1748 struct inet_cork_full *cork,
1749 const struct sockcm_cookie *sockc)
1751 struct inet6_cork v6_cork;
1752 struct sk_buff_head queue;
1753 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1756 if (flags & MSG_PROBE)
1759 __skb_queue_head_init(&queue);
1761 cork->base.flags = 0;
1762 cork->base.addr = 0;
1763 cork->base.opt = NULL;
1764 cork->base.dst = NULL;
1766 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1768 ip6_cork_release(cork, &v6_cork);
1769 return ERR_PTR(err);
1771 if (ipc6->dontfrag < 0)
1772 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1774 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1775 ¤t->task_frag, getfrag, from,
1776 length + exthdrlen, transhdrlen + exthdrlen,
1777 flags, ipc6, sockc);
1779 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1780 return ERR_PTR(err);
1783 return __ip6_make_skb(sk, &queue, cork, &v6_cork);