2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * The Internet Protocol (IP) output module.
8 * Version: $Id: ip_output.c,v 1.99.2.1 2002/03/10 04:26:08 davem Exp $
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Donald Becker, <becker@super.org>
13 * Alan Cox, <Alan.Cox@linux.org>
15 * Stefan Becker, <stefanb@yello.ping.de>
16 * Jorge Cwik, <jorge@laser.satlink.net>
17 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
19 * See ip_input.c for original log
22 * Alan Cox : Missing nonblock feature in ip_build_xmit.
23 * Mike Kilburn : htons() missing in ip_build_xmit.
24 * Bradford Johnson: Fix faulty handling of some frames when
26 * Alexander Demenshin: Missing sk/skb free in ip_queue_xmit
27 * (in case if packet not accepted by
28 * output firewall rules)
29 * Mike McLagan : Routing by source
30 * Alexey Kuznetsov: use new route cache
31 * Andi Kleen: Fix broken PMTU recovery and remove
32 * some redundant tests.
33 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
34 * Andi Kleen : Replace ip_reply with ip_send_reply.
35 * Andi Kleen : Split fast and slow ip_build_xmit path
36 * for decreased register pressure on x86
37 * and more readibility.
38 * Marc Boucher : When call_out_firewall returns FW_QUEUE,
39 * silently drop skb instead of failing with -EPERM.
40 * Detlev Wengorz : Copy protocol for fragments.
43 #include <asm/uaccess.h>
44 #include <asm/system.h>
45 #include <linux/types.h>
46 #include <linux/kernel.h>
47 #include <linux/sched.h>
49 #include <linux/string.h>
50 #include <linux/errno.h>
51 #include <linux/config.h>
53 #include <linux/socket.h>
54 #include <linux/sockios.h>
56 #include <linux/inet.h>
57 #include <linux/netdevice.h>
58 #include <linux/etherdevice.h>
59 #include <linux/proc_fs.h>
60 #include <linux/stat.h>
61 #include <linux/init.h>
65 #include <net/protocol.h>
66 #include <net/route.h>
69 #include <linux/skbuff.h>
74 #include <net/checksum.h>
75 #include <net/inetpeer.h>
76 #include <linux/igmp.h>
77 #include <linux/netfilter_ipv4.h>
78 #include <linux/mroute.h>
79 #include <linux/netlink.h>
82 * Shall we try to damage output packets if routing dev changes?
85 int sysctl_ip_dynaddr = 0;
86 int sysctl_ip_default_ttl = IPDEFTTL;
88 /* Generate a checksum for an outgoing IP datagram. */
89 __inline__ void ip_send_check(struct iphdr *iph)
92 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
95 /* dev_loopback_xmit for use with netfilter. */
96 static int ip_dev_loopback_xmit(struct sk_buff *newskb)
98 newskb->mac.raw = newskb->data;
99 __skb_pull(newskb, newskb->nh.raw - newskb->data);
100 newskb->pkt_type = PACKET_LOOPBACK;
101 newskb->ip_summed = CHECKSUM_UNNECESSARY;
102 BUG_TRAP(newskb->dst);
104 #ifdef CONFIG_NETFILTER_DEBUG
105 nf_debug_ip_loopback_xmit(newskb);
111 /* Don't just hand NF_HOOK skb->dst->output, in case netfilter hook
114 output_maybe_reroute(struct sk_buff *skb)
116 return skb->dst->output(skb);
120 * Add an ip header to a skbuff and send it out.
122 int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
123 u32 saddr, u32 daddr, struct ip_options *opt)
125 struct rtable *rt = (struct rtable *)skb->dst;
128 /* Build the IP header. */
130 iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen);
132 iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
136 iph->tos = sk->protinfo.af_inet.tos;
137 if (ip_dont_fragment(sk, &rt->u.dst))
138 iph->frag_off = htons(IP_DF);
141 iph->ttl = sk->protinfo.af_inet.ttl;
142 iph->daddr = rt->rt_dst;
143 iph->saddr = rt->rt_src;
144 iph->protocol = sk->protocol;
145 iph->tot_len = htons(skb->len);
146 ip_select_ident(iph, &rt->u.dst, sk);
149 if (opt && opt->optlen) {
150 iph->ihl += opt->optlen>>2;
151 ip_options_build(skb, opt, daddr, rt, 0);
156 return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
157 output_maybe_reroute);
160 static inline int ip_finish_output2(struct sk_buff *skb)
162 struct dst_entry *dst = skb->dst;
163 struct hh_cache *hh = dst->hh;
165 #ifdef CONFIG_NETFILTER_DEBUG
166 nf_debug_ip_finish_output2(skb);
167 #endif /*CONFIG_NETFILTER_DEBUG*/
172 read_lock_bh(&hh->hh_lock);
173 hh_alen = HH_DATA_ALIGN(hh->hh_len);
174 memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
175 read_unlock_bh(&hh->hh_lock);
176 skb_push(skb, hh->hh_len);
177 return hh->hh_output(skb);
178 } else if (dst->neighbour)
179 return dst->neighbour->output(skb);
182 printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
187 static __inline__ int __ip_finish_output(struct sk_buff *skb)
189 struct net_device *dev = skb->dst->dev;
192 skb->protocol = htons(ETH_P_IP);
194 return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
198 int ip_finish_output(struct sk_buff *skb)
200 return __ip_finish_output(skb);
203 int ip_mc_output(struct sk_buff *skb)
205 struct sock *sk = skb->sk;
206 struct rtable *rt = (struct rtable*)skb->dst;
207 struct net_device *dev = rt->u.dst.dev;
210 * If the indicated interface is up and running, send the packet.
212 IP_INC_STATS(IpOutRequests);
213 #ifdef CONFIG_IP_ROUTE_NAT
214 if (rt->rt_flags & RTCF_NAT)
219 skb->protocol = htons(ETH_P_IP);
222 * Multicasts are looped back for other local users
225 if (rt->rt_flags&RTCF_MULTICAST) {
226 if ((!sk || sk->protinfo.af_inet.mc_loop)
227 #ifdef CONFIG_IP_MROUTE
228 /* Small optimization: do not loopback not local frames,
229 which returned after forwarding; they will be dropped
230 by ip_mr_input in any case.
231 Note, that local frames are looped back to be delivered
234 This check is duplicated in ip_mr_input at the moment.
236 && ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED))
239 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
241 NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
243 ip_dev_loopback_xmit);
246 /* Multicasts with ttl 0 must not go beyond the host */
248 if (skb->nh.iph->ttl == 0) {
254 if (rt->rt_flags&RTCF_BROADCAST) {
255 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
257 NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
258 newskb->dev, ip_dev_loopback_xmit);
261 return __ip_finish_output(skb);
264 int ip_output(struct sk_buff *skb)
266 #ifdef CONFIG_IP_ROUTE_NAT
267 struct rtable *rt = (struct rtable*)skb->dst;
270 IP_INC_STATS(IpOutRequests);
272 #ifdef CONFIG_IP_ROUTE_NAT
273 if (rt->rt_flags&RTCF_NAT)
277 return __ip_finish_output(skb);
280 /* Queues a packet to be sent, and starts the transmitter if necessary.
281 * This routine also needs to put in the total length and compute the
282 * checksum. We use to do this in two stages, ip_build_header() then
283 * this, but that scheme created a mess when routes disappeared etc.
284 * So we do it all here, and the TCP send engine has been changed to
285 * match. (No more unroutable FIN disasters, etc. wheee...) This will
286 * most likely make other reliable transport layers above IP easier
287 * to implement under Linux.
289 static inline int ip_queue_xmit2(struct sk_buff *skb)
291 struct sock *sk = skb->sk;
292 struct rtable *rt = (struct rtable *)skb->dst;
293 struct net_device *dev;
294 struct iphdr *iph = skb->nh.iph;
298 /* This can happen when the transport layer has segments queued
299 * with a cached route, and by the time we get here things are
300 * re-routed to a device with a different MTU than the original
301 * device. Sick, but we must cover it.
303 if (skb_headroom(skb) < dev->hard_header_len && dev->hard_header) {
304 struct sk_buff *skb2;
306 skb2 = skb_realloc_headroom(skb, (dev->hard_header_len + 15) & ~15);
311 skb_set_owner_w(skb2, sk);
316 if (skb->len > rt->u.dst.pmtu)
319 ip_select_ident(iph, &rt->u.dst, sk);
321 /* Add an IP checksum. */
324 skb->priority = sk->priority;
325 return skb->dst->output(skb);
328 if (ip_dont_fragment(sk, &rt->u.dst)) {
329 /* Reject packet ONLY if TCP might fragment
330 * it itself, if were careful enough.
332 NETDEBUG(printk(KERN_DEBUG "sending pkt_too_big (len[%u] pmtu[%u]) to self\n",
333 skb->len, rt->u.dst.pmtu));
335 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
336 htonl(rt->u.dst.pmtu));
340 ip_select_ident(iph, &rt->u.dst, sk);
341 if (skb->ip_summed == CHECKSUM_HW &&
342 (skb = skb_checksum_help(skb)) == NULL)
344 return ip_fragment(skb, skb->dst->output);
347 int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
349 struct sock *sk = skb->sk;
350 struct ip_options *opt = sk->protinfo.af_inet.opt;
354 /* Skip all of this if the packet is already routed,
355 * f.e. by something like SCTP.
357 rt = (struct rtable *) skb->dst;
361 /* Make sure we can route this packet. */
362 rt = (struct rtable *)__sk_dst_check(sk, 0);
366 /* Use correct destination address if we have options. */
371 /* If this fails, retransmit mechanism of transport layer will
372 * keep trying until route appears or the connection times itself
375 if (ip_route_output(&rt, daddr, sk->saddr,
379 __sk_dst_set(sk, &rt->u.dst);
380 sk->route_caps = rt->u.dst.dev->features;
382 skb->dst = dst_clone(&rt->u.dst);
385 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
388 /* OK, we know where to send it, allocate and build IP header. */
389 iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
390 *((__u16 *)iph) = htons((4 << 12) | (5 << 8) | (sk->protinfo.af_inet.tos & 0xff));
391 iph->tot_len = htons(skb->len);
392 if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
393 iph->frag_off = htons(IP_DF);
396 iph->ttl = sk->protinfo.af_inet.ttl;
397 iph->protocol = sk->protocol;
398 iph->saddr = rt->rt_src;
399 iph->daddr = rt->rt_dst;
401 /* Transport layer set skb->h.foo itself. */
403 if(opt && opt->optlen) {
404 iph->ihl += opt->optlen >> 2;
405 ip_options_build(skb, opt, sk->daddr, rt, 0);
408 return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
412 IP_INC_STATS(IpOutNoRoutes);
414 return -EHOSTUNREACH;
418 * Build and send a packet, with as little as one copy
420 * Doesn't care much about ip options... option length can be
421 * different for fragment at 0 and other fragments.
423 * Note that the fragment at the highest offset is sent first,
424 * so the getfrag routine can fill in the TCP/UDP checksum header
425 * field in the last fragment it sends... actually it also helps
426 * the reassemblers, they can put most packets in at the head of
427 * the fragment queue, and they know the total size in advance. This
428 * last feature will measurably improve the Linux fragment handler one
431 * The callback has five args, an arbitrary pointer (copy of frag),
432 * the source IP address (may depend on the routing table), the
433 * destination address (char *), the offset to copy from, and the
434 * length to be copied.
437 static int ip_build_xmit_slow(struct sock *sk,
438 int getfrag (const void *,
445 struct ipcm_cookie *ipc,
449 unsigned int fraglen, maxfraglen, fragheaderlen;
455 int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
457 struct ip_options *opt = ipc->opt;
460 mtu = rt->u.dst.pmtu;
461 if (ip_dont_fragment(sk, &rt->u.dst))
464 length -= sizeof(struct iphdr);
467 fragheaderlen = sizeof(struct iphdr) + opt->optlen;
468 maxfraglen = ((mtu-sizeof(struct iphdr)-opt->optlen) & ~7) + fragheaderlen;
470 fragheaderlen = sizeof(struct iphdr);
473 * Fragheaderlen is the size of 'overhead' on each buffer. Now work
474 * out the size of the frames to send.
477 maxfraglen = ((mtu-sizeof(struct iphdr)) & ~7) + fragheaderlen;
480 if (length + fragheaderlen > 0xFFFF) {
481 ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, mtu);
486 * Start at the end of the frame by handling the remainder.
489 offset = length - (length % (maxfraglen - fragheaderlen));
492 * Amount of memory to allocate for final fragment.
495 fraglen = length - offset + fragheaderlen;
497 if (length-offset==0) {
498 fraglen = maxfraglen;
499 offset -= maxfraglen-fragheaderlen;
503 * The last fragment will not have MF (more fragments) set.
509 * Don't fragment packets for path mtu discovery.
512 if (offset > 0 && sk->protinfo.af_inet.pmtudisc==IP_PMTUDISC_DO) {
513 ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, mtu);
520 * Begin outputting the bytes.
523 id = sk->protinfo.af_inet.id++;
527 struct sk_buff * skb;
530 * Get the memory we require with some space left for alignment.
532 if (!(flags & MSG_DONTWAIT) || nfrags == 0) {
533 skb = sock_alloc_send_skb(sk, fraglen + hh_len + 15,
534 (flags & MSG_DONTWAIT), &err);
536 /* On a non-blocking write, we check for send buffer
537 * usage on the first fragment only.
539 skb = sock_wmalloc(sk, fraglen + hh_len + 15, 1,
548 * Fill in the control structures
551 skb->priority = sk->priority;
552 skb->dst = dst_clone(&rt->u.dst);
553 skb_reserve(skb, hh_len);
556 * Find where to start putting bytes.
559 data = skb_put(skb, fraglen);
560 skb->nh.iph = (struct iphdr *)data;
563 * Only write IP header onto non-raw packets
567 struct iphdr *iph = (struct iphdr *)data;
572 iph->ihl += opt->optlen>>2;
573 ip_options_build(skb, opt,
574 ipc->addr, rt, offset);
576 iph->tos = sk->protinfo.af_inet.tos;
577 iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
578 iph->frag_off = htons(offset>>3)|mf|df;
582 /* Select an unpredictable ident only
583 * for packets without DF or having
586 __ip_select_ident(iph, &rt->u.dst);
591 * Any further fragments will have MF set.
595 if (rt->rt_type == RTN_MULTICAST)
596 iph->ttl = sk->protinfo.af_inet.mc_ttl;
598 iph->ttl = sk->protinfo.af_inet.ttl;
599 iph->protocol = sk->protocol;
601 iph->saddr = rt->rt_src;
602 iph->daddr = rt->rt_dst;
603 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
611 if (getfrag(frag, data, offset, fraglen-fragheaderlen, skb)) {
617 offset -= (maxfraglen-fragheaderlen);
618 fraglen = maxfraglen;
622 err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
623 skb->dst->dev, output_maybe_reroute);
626 err = sk->protinfo.af_inet.recverr ? net_xmit_errno(err) : 0;
630 } while (offset >= 0);
633 ip_statistics[smp_processor_id()*2 + !in_softirq()].IpFragCreates += nfrags;
638 IP_INC_STATS(IpOutDiscards);
640 ip_statistics[smp_processor_id()*2 + !in_softirq()].IpFragCreates += nfrags;
645 * Fast path for unfragmented packets.
647 int ip_build_xmit(struct sock *sk,
648 int getfrag (const void *,
655 struct ipcm_cookie *ipc,
665 * Try the simple case first. This leaves fragmented frames, and by
666 * choice RAW frames within 20 bytes of maximum size(rare) to the long path
669 if (!sk->protinfo.af_inet.hdrincl) {
670 length += sizeof(struct iphdr);
673 * Check for slow path.
675 if (length > rt->u.dst.pmtu || ipc->opt != NULL)
676 return ip_build_xmit_slow(sk,getfrag,frag,length,ipc,rt,flags);
678 if (length > rt->u.dst.dev->mtu) {
679 ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, rt->u.dst.dev->mtu);
687 * Do path mtu discovery if needed.
690 if (ip_dont_fragment(sk, &rt->u.dst))
694 * Fast path for unfragmented frames without options.
697 int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
699 skb = sock_alloc_send_skb(sk, length+hh_len+15,
700 flags&MSG_DONTWAIT, &err);
703 skb_reserve(skb, hh_len);
706 skb->priority = sk->priority;
707 skb->dst = dst_clone(&rt->u.dst);
709 skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length);
711 if(!sk->protinfo.af_inet.hdrincl) {
714 iph->tos=sk->protinfo.af_inet.tos;
715 iph->tot_len = htons(length);
717 iph->ttl=sk->protinfo.af_inet.mc_ttl;
718 ip_select_ident(iph, &rt->u.dst, sk);
719 if (rt->rt_type != RTN_MULTICAST)
720 iph->ttl=sk->protinfo.af_inet.ttl;
721 iph->protocol=sk->protocol;
722 iph->saddr=rt->rt_src;
723 iph->daddr=rt->rt_dst;
725 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
726 err = getfrag(frag, ((char *)iph)+iph->ihl*4,0, length-iph->ihl*4, skb);
729 err = getfrag(frag, (void *)iph, 0, length, skb);
734 err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
735 output_maybe_reroute);
737 err = sk->protinfo.af_inet.recverr ? net_xmit_errno(err) : 0;
747 IP_INC_STATS(IpOutDiscards);
752 * This IP datagram is too large to be sent in one piece. Break it up into
753 * smaller pieces (each of size equal to IP header plus
754 * a block of the data of the original IP data part) that will yet fit in a
755 * single device frame, and queue such a frame for sending.
757 * Yes this is inefficient, feel free to submit a quicker one.
760 int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
765 struct net_device *dev;
766 struct sk_buff *skb2;
767 unsigned int mtu, hlen, left, len;
770 struct rtable *rt = (struct rtable*)skb->dst;
776 * Point into the IP datagram header.
782 * Setup starting values.
786 left = skb->len - hlen; /* Space per frame */
787 mtu = rt->u.dst.pmtu - hlen; /* Size of data space */
788 ptr = raw + hlen; /* Where to start from */
791 * Fragment the datagram.
794 offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
795 not_last_frag = iph->frag_off & htons(IP_MF);
798 * Keep copying data until we run out.
803 /* IF: it doesn't fit, use 'mtu' - the data space left */
806 /* IF: we are not sending upto and including the packet end
807 then align the next start on an eight byte boundary */
815 if ((skb2 = alloc_skb(len+hlen+dev->hard_header_len+15,GFP_ATOMIC)) == NULL) {
816 NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n"));
822 * Set up data on packet
825 skb2->pkt_type = skb->pkt_type;
826 skb2->priority = skb->priority;
827 skb_reserve(skb2, (dev->hard_header_len+15)&~15);
828 skb_put(skb2, len + hlen);
829 skb2->nh.raw = skb2->data;
830 skb2->h.raw = skb2->data + hlen;
831 skb2->protocol = skb->protocol;
832 skb2->security = skb->security;
835 * Charge the memory for the fragment to any owner
840 skb_set_owner_w(skb2, skb->sk);
841 skb2->dst = dst_clone(skb->dst);
842 skb2->dev = skb->dev;
845 * Copy the packet header into the new buffer.
848 memcpy(skb2->nh.raw, skb->data, hlen);
851 * Copy a block of the IP datagram.
853 if (skb_copy_bits(skb, ptr, skb2->h.raw, len))
858 * Fill in the new header fields.
861 iph->frag_off = htons((offset >> 3));
863 /* ANK: dirty, but effective trick. Upgrade options only if
864 * the segment to be fragmented was THE FIRST (otherwise,
865 * options are already fixed) and make it ONCE
866 * on the initial skb, so that all the following fragments
867 * will inherit fixed options.
870 ip_options_fragment(skb);
872 /* Copy the flags to each fragment. */
873 IPCB(skb2)->flags = IPCB(skb)->flags;
876 * Added AC : If we are fragmenting a fragment that's not the
877 * last fragment then keep MF on each bit
879 if (left > 0 || not_last_frag)
880 iph->frag_off |= htons(IP_MF);
884 #ifdef CONFIG_NET_SCHED
885 skb2->tc_index = skb->tc_index;
887 #ifdef CONFIG_NETFILTER
888 skb2->nfmark = skb->nfmark;
889 skb2->nfcache = skb->nfcache;
890 /* Connection association is same as pre-frag packet */
891 skb2->nfct = skb->nfct;
892 nf_conntrack_get(skb2->nfct);
893 #ifdef CONFIG_NETFILTER_DEBUG
894 skb2->nf_debug = skb->nf_debug;
899 * Put this fragment into the sending queue.
902 IP_INC_STATS(IpFragCreates);
904 iph->tot_len = htons(len + hlen);
913 IP_INC_STATS(IpFragOKs);
918 IP_INC_STATS(IpFragFails);
923 * Fetch data from kernel space and fill in checksum if needed.
925 static int ip_reply_glue_bits(const void *dptr, char *to, unsigned int offset,
926 unsigned int fraglen, struct sk_buff *skb)
928 struct ip_reply_arg *dp = (struct ip_reply_arg*)dptr;
929 u16 *pktp = (u16 *)to;
935 if (offset >= iov->iov_len) {
936 offset -= iov->iov_len;
940 len = iov->iov_len - offset;
941 if (fraglen > len) { /* overlapping. */
942 dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, len,
950 dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, fraglen,
953 if (hdrflag && dp->csumoffset)
954 *(pktp + dp->csumoffset) = csum_fold(dp->csum); /* fill in checksum */
959 * Generic function to send a packet as reply to another packet.
960 * Used to send TCP resets so far. ICMP should use this function too.
962 * Should run single threaded per socket because it uses the sock
963 * structure to pass arguments.
965 void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
969 struct ip_options opt;
972 struct ipcm_cookie ipc;
974 struct rtable *rt = (struct rtable*)skb->dst;
976 if (ip_options_echo(&replyopts.opt, skb))
979 daddr = ipc.addr = rt->rt_src;
982 if (replyopts.opt.optlen) {
983 ipc.opt = &replyopts.opt;
986 daddr = replyopts.opt.faddr;
989 if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0))
992 /* And let IP do all the hard work.
994 This chunk is not reenterable, hence spinlock.
995 Note that it uses the fact, that this function is called
996 with locally disabled BH and that sk cannot be already spinlocked.
999 sk->protinfo.af_inet.tos = skb->nh.iph->tos;
1000 sk->priority = skb->priority;
1001 sk->protocol = skb->nh.iph->protocol;
1002 ip_build_xmit(sk, ip_reply_glue_bits, arg, len, &ipc, rt, MSG_DONTWAIT);
1009 * IP protocol layer initialiser
1012 static struct packet_type ip_packet_type =
1014 __constant_htons(ETH_P_IP),
1015 NULL, /* All devices */
1022 * IP registers the packet type and then calls the subprotocol initialisers
1025 void __init ip_init(void)
1027 dev_add_pack(&ip_packet_type);
1032 #ifdef CONFIG_IP_MULTICAST
1033 proc_net_create("igmp", 0, ip_mc_procinfo);
1035 proc_net_create("mcfilter", 0, ip_mcf_procinfo);