2 * NET3 IP device support routines.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Derived from the IP parts of dev.c 1.0.19
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
55 #include <linux/sysctl.h>
57 #include <linux/kmod.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
67 static struct ipv4_devconf ipv4_devconf = {
69 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
70 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
71 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
72 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 static struct ipv4_devconf ipv4_devconf_dflt = {
78 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
79 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
80 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
81 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
82 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
86 #define IPV4_DEVCONF_DFLT(net, attr) \
87 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
89 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
90 [IFA_LOCAL] = { .type = NLA_U32 },
91 [IFA_ADDRESS] = { .type = NLA_U32 },
92 [IFA_BROADCAST] = { .type = NLA_U32 },
93 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
96 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
98 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
99 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
102 static void devinet_sysctl_register(struct in_device *idev);
103 static void devinet_sysctl_unregister(struct in_device *idev);
105 static inline void devinet_sysctl_register(struct in_device *idev)
108 static inline void devinet_sysctl_unregister(struct in_device *idev)
113 /* Locks all the inet devices. */
115 static struct in_ifaddr *inet_alloc_ifa(void)
117 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
120 static void inet_rcu_free_ifa(struct rcu_head *head)
122 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
124 in_dev_put(ifa->ifa_dev);
128 static inline void inet_free_ifa(struct in_ifaddr *ifa)
130 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
133 void in_dev_finish_destroy(struct in_device *idev)
135 struct net_device *dev = idev->dev;
137 WARN_ON(idev->ifa_list);
138 WARN_ON(idev->mc_list);
139 #ifdef NET_REFCNT_DEBUG
140 printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
141 idev, dev ? dev->name : "NIL");
145 pr_err("Freeing alive in_device %p\n", idev);
149 EXPORT_SYMBOL(in_dev_finish_destroy);
151 static struct in_device *inetdev_init(struct net_device *dev)
153 struct in_device *in_dev;
157 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
160 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
161 sizeof(in_dev->cnf));
162 in_dev->cnf.sysctl = NULL;
164 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
165 if (!in_dev->arp_parms)
167 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
168 dev_disable_lro(dev);
169 /* Reference in_dev->dev */
171 /* Account for reference dev->ip_ptr (below) */
174 devinet_sysctl_register(in_dev);
175 ip_mc_init_dev(in_dev);
176 if (dev->flags & IFF_UP)
179 /* we can receive as soon as ip_ptr is set -- do this last */
180 rcu_assign_pointer(dev->ip_ptr, in_dev);
189 static void in_dev_rcu_put(struct rcu_head *head)
191 struct in_device *idev = container_of(head, struct in_device, rcu_head);
195 static void inetdev_destroy(struct in_device *in_dev)
197 struct in_ifaddr *ifa;
198 struct net_device *dev;
206 ip_mc_destroy_dev(in_dev);
208 while ((ifa = in_dev->ifa_list) != NULL) {
209 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
215 devinet_sysctl_unregister(in_dev);
216 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
219 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
222 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
225 for_primary_ifa(in_dev) {
226 if (inet_ifa_match(a, ifa)) {
227 if (!b || inet_ifa_match(b, ifa)) {
232 } endfor_ifa(in_dev);
237 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
238 int destroy, struct nlmsghdr *nlh, u32 pid)
240 struct in_ifaddr *promote = NULL;
241 struct in_ifaddr *ifa, *ifa1 = *ifap;
242 struct in_ifaddr *last_prim = in_dev->ifa_list;
243 struct in_ifaddr *prev_prom = NULL;
244 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
248 /* 1. Deleting primary ifaddr forces deletion all secondaries
249 * unless alias promotion is set
252 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
253 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
255 while ((ifa = *ifap1) != NULL) {
256 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
257 ifa1->ifa_scope <= ifa->ifa_scope)
260 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
261 ifa1->ifa_mask != ifa->ifa_mask ||
262 !inet_ifa_match(ifa1->ifa_address, ifa)) {
263 ifap1 = &ifa->ifa_next;
269 *ifap1 = ifa->ifa_next;
271 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
272 blocking_notifier_call_chain(&inetaddr_chain,
284 *ifap = ifa1->ifa_next;
286 /* 3. Announce address deletion */
288 /* Send message first, then call notifier.
289 At first sight, FIB update triggered by notifier
290 will refer to already deleted ifaddr, that could confuse
291 netlink listeners. It is not true: look, gated sees
292 that route deleted and if it still thinks that ifaddr
293 is valid, it will try to restore deleted routes... Grr.
294 So that, this order is correct.
296 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
297 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
302 prev_prom->ifa_next = promote->ifa_next;
303 promote->ifa_next = last_prim->ifa_next;
304 last_prim->ifa_next = promote;
307 promote->ifa_flags &= ~IFA_F_SECONDARY;
308 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
309 blocking_notifier_call_chain(&inetaddr_chain,
311 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
312 if (ifa1->ifa_mask != ifa->ifa_mask ||
313 !inet_ifa_match(ifa1->ifa_address, ifa))
323 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
326 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
329 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
332 struct in_device *in_dev = ifa->ifa_dev;
333 struct in_ifaddr *ifa1, **ifap, **last_primary;
337 if (!ifa->ifa_local) {
342 ifa->ifa_flags &= ~IFA_F_SECONDARY;
343 last_primary = &in_dev->ifa_list;
345 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
346 ifap = &ifa1->ifa_next) {
347 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
348 ifa->ifa_scope <= ifa1->ifa_scope)
349 last_primary = &ifa1->ifa_next;
350 if (ifa1->ifa_mask == ifa->ifa_mask &&
351 inet_ifa_match(ifa1->ifa_address, ifa)) {
352 if (ifa1->ifa_local == ifa->ifa_local) {
356 if (ifa1->ifa_scope != ifa->ifa_scope) {
360 ifa->ifa_flags |= IFA_F_SECONDARY;
364 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
365 net_srandom(ifa->ifa_local);
369 ifa->ifa_next = *ifap;
372 /* Send message first, then call notifier.
373 Notifier will trigger FIB update, so that
374 listeners of netlink will know about new ifaddr */
375 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
376 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
381 static int inet_insert_ifa(struct in_ifaddr *ifa)
383 return __inet_insert_ifa(ifa, NULL, 0);
386 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
388 struct in_device *in_dev = __in_dev_get_rtnl(dev);
396 ipv4_devconf_setall(in_dev);
397 if (ifa->ifa_dev != in_dev) {
398 WARN_ON(ifa->ifa_dev);
400 ifa->ifa_dev = in_dev;
402 if (ipv4_is_loopback(ifa->ifa_local))
403 ifa->ifa_scope = RT_SCOPE_HOST;
404 return inet_insert_ifa(ifa);
407 struct in_device *inetdev_by_index(struct net *net, int ifindex)
409 struct net_device *dev;
410 struct in_device *in_dev = NULL;
413 dev = dev_get_by_index_rcu(net, ifindex);
415 in_dev = in_dev_get(dev);
419 EXPORT_SYMBOL(inetdev_by_index);
421 /* Called only from RTNL semaphored context. No locks. */
423 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
428 for_primary_ifa(in_dev) {
429 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
431 } endfor_ifa(in_dev);
435 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
437 struct net *net = sock_net(skb->sk);
438 struct nlattr *tb[IFA_MAX+1];
439 struct in_device *in_dev;
440 struct ifaddrmsg *ifm;
441 struct in_ifaddr *ifa, **ifap;
446 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
450 ifm = nlmsg_data(nlh);
451 in_dev = inetdev_by_index(net, ifm->ifa_index);
452 if (in_dev == NULL) {
457 __in_dev_put(in_dev);
459 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
460 ifap = &ifa->ifa_next) {
462 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
465 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
468 if (tb[IFA_ADDRESS] &&
469 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
470 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
473 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
477 err = -EADDRNOTAVAIL;
482 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
484 struct nlattr *tb[IFA_MAX+1];
485 struct in_ifaddr *ifa;
486 struct ifaddrmsg *ifm;
487 struct net_device *dev;
488 struct in_device *in_dev;
491 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
495 ifm = nlmsg_data(nlh);
497 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
500 dev = __dev_get_by_index(net, ifm->ifa_index);
505 in_dev = __in_dev_get_rtnl(dev);
510 ifa = inet_alloc_ifa();
513 * A potential indev allocation can be left alive, it stays
514 * assigned to its device and is destroy with it.
518 ipv4_devconf_setall(in_dev);
521 if (tb[IFA_ADDRESS] == NULL)
522 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
524 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
525 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
526 ifa->ifa_flags = ifm->ifa_flags;
527 ifa->ifa_scope = ifm->ifa_scope;
528 ifa->ifa_dev = in_dev;
530 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
531 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
533 if (tb[IFA_BROADCAST])
534 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
537 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
539 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
547 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
549 struct net *net = sock_net(skb->sk);
550 struct in_ifaddr *ifa;
554 ifa = rtm_to_ifaddr(net, nlh);
558 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
562 * Determine a default network mask, based on the IP address.
565 static inline int inet_abc_len(__be32 addr)
567 int rc = -1; /* Something else, probably a multicast. */
569 if (ipv4_is_zeronet(addr))
572 __u32 haddr = ntohl(addr);
574 if (IN_CLASSA(haddr))
576 else if (IN_CLASSB(haddr))
578 else if (IN_CLASSC(haddr))
586 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
589 struct sockaddr_in sin_orig;
590 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
591 struct in_device *in_dev;
592 struct in_ifaddr **ifap = NULL;
593 struct in_ifaddr *ifa = NULL;
594 struct net_device *dev;
597 int tryaddrmatch = 0;
600 * Fetch the caller's info block into kernel space
603 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
605 ifr.ifr_name[IFNAMSIZ - 1] = 0;
607 /* save original address for comparison */
608 memcpy(&sin_orig, sin, sizeof(*sin));
610 colon = strchr(ifr.ifr_name, ':');
614 dev_load(net, ifr.ifr_name);
617 case SIOCGIFADDR: /* Get interface address */
618 case SIOCGIFBRDADDR: /* Get the broadcast address */
619 case SIOCGIFDSTADDR: /* Get the destination address */
620 case SIOCGIFNETMASK: /* Get the netmask for the interface */
621 /* Note that these ioctls will not sleep,
622 so that we do not impose a lock.
623 One day we will be forced to put shlock here (I mean SMP)
625 tryaddrmatch = (sin_orig.sin_family == AF_INET);
626 memset(sin, 0, sizeof(*sin));
627 sin->sin_family = AF_INET;
632 if (!capable(CAP_NET_ADMIN))
635 case SIOCSIFADDR: /* Set interface address (and family) */
636 case SIOCSIFBRDADDR: /* Set the broadcast address */
637 case SIOCSIFDSTADDR: /* Set the destination address */
638 case SIOCSIFNETMASK: /* Set the netmask for the interface */
639 case SIOCKILLADDR: /* Nuke all sockets on this address */
641 if (!capable(CAP_NET_ADMIN))
644 if (sin->sin_family != AF_INET)
655 dev = __dev_get_by_name(net, ifr.ifr_name);
662 in_dev = __in_dev_get_rtnl(dev);
665 /* Matthias Andree */
666 /* compare label and address (4.4BSD style) */
667 /* note: we only do this for a limited set of ioctls
668 and only if the original address family was AF_INET.
669 This is checked above. */
670 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
671 ifap = &ifa->ifa_next) {
672 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
673 sin_orig.sin_addr.s_addr ==
679 /* we didn't get a match, maybe the application is
680 4.3BSD-style and passed in junk so we fall back to
681 comparing just the label */
683 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
684 ifap = &ifa->ifa_next)
685 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
690 ret = -EADDRNOTAVAIL;
691 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS
692 && cmd != SIOCKILLADDR)
696 case SIOCGIFADDR: /* Get interface address */
697 sin->sin_addr.s_addr = ifa->ifa_local;
700 case SIOCGIFBRDADDR: /* Get the broadcast address */
701 sin->sin_addr.s_addr = ifa->ifa_broadcast;
704 case SIOCGIFDSTADDR: /* Get the destination address */
705 sin->sin_addr.s_addr = ifa->ifa_address;
708 case SIOCGIFNETMASK: /* Get the netmask for the interface */
709 sin->sin_addr.s_addr = ifa->ifa_mask;
714 ret = -EADDRNOTAVAIL;
718 if (!(ifr.ifr_flags & IFF_UP))
719 inet_del_ifa(in_dev, ifap, 1);
722 ret = dev_change_flags(dev, ifr.ifr_flags);
725 case SIOCSIFADDR: /* Set interface address (and family) */
727 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
732 ifa = inet_alloc_ifa();
736 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
738 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
741 if (ifa->ifa_local == sin->sin_addr.s_addr)
743 inet_del_ifa(in_dev, ifap, 0);
744 ifa->ifa_broadcast = 0;
748 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
750 if (!(dev->flags & IFF_POINTOPOINT)) {
751 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
752 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
753 if ((dev->flags & IFF_BROADCAST) &&
754 ifa->ifa_prefixlen < 31)
755 ifa->ifa_broadcast = ifa->ifa_address |
758 ifa->ifa_prefixlen = 32;
759 ifa->ifa_mask = inet_make_mask(32);
761 ret = inet_set_ifa(dev, ifa);
764 case SIOCSIFBRDADDR: /* Set the broadcast address */
766 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
767 inet_del_ifa(in_dev, ifap, 0);
768 ifa->ifa_broadcast = sin->sin_addr.s_addr;
769 inet_insert_ifa(ifa);
773 case SIOCSIFDSTADDR: /* Set the destination address */
775 if (ifa->ifa_address == sin->sin_addr.s_addr)
778 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
781 inet_del_ifa(in_dev, ifap, 0);
782 ifa->ifa_address = sin->sin_addr.s_addr;
783 inet_insert_ifa(ifa);
786 case SIOCSIFNETMASK: /* Set the netmask for the interface */
789 * The mask we set must be legal.
792 if (bad_mask(sin->sin_addr.s_addr, 0))
795 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
796 __be32 old_mask = ifa->ifa_mask;
797 inet_del_ifa(in_dev, ifap, 0);
798 ifa->ifa_mask = sin->sin_addr.s_addr;
799 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
801 /* See if current broadcast address matches
802 * with current netmask, then recalculate
803 * the broadcast address. Otherwise it's a
804 * funny address, so don't touch it since
805 * the user seems to know what (s)he's doing...
807 if ((dev->flags & IFF_BROADCAST) &&
808 (ifa->ifa_prefixlen < 31) &&
809 (ifa->ifa_broadcast ==
810 (ifa->ifa_local|~old_mask))) {
811 ifa->ifa_broadcast = (ifa->ifa_local |
812 ~sin->sin_addr.s_addr);
814 inet_insert_ifa(ifa);
817 case SIOCKILLADDR: /* Nuke all connections on this address */
819 tcp_v4_nuke_addr(sin->sin_addr.s_addr);
828 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
832 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
834 struct in_device *in_dev = __in_dev_get_rtnl(dev);
835 struct in_ifaddr *ifa;
842 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
847 if (len < (int) sizeof(ifr))
849 memset(&ifr, 0, sizeof(struct ifreq));
851 strcpy(ifr.ifr_name, ifa->ifa_label);
853 strcpy(ifr.ifr_name, dev->name);
855 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
856 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
859 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
863 buf += sizeof(struct ifreq);
864 len -= sizeof(struct ifreq);
865 done += sizeof(struct ifreq);
871 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
874 struct in_device *in_dev;
875 struct net *net = dev_net(dev);
878 in_dev = __in_dev_get_rcu(dev);
882 for_primary_ifa(in_dev) {
883 if (ifa->ifa_scope > scope)
885 if (!dst || inet_ifa_match(dst, ifa)) {
886 addr = ifa->ifa_local;
890 addr = ifa->ifa_local;
891 } endfor_ifa(in_dev);
897 /* Not loopback addresses on loopback should be preferred
898 in this case. It is importnat that lo is the first interface
901 for_each_netdev_rcu(net, dev) {
902 in_dev = __in_dev_get_rcu(dev);
906 for_primary_ifa(in_dev) {
907 if (ifa->ifa_scope != RT_SCOPE_LINK &&
908 ifa->ifa_scope <= scope) {
909 addr = ifa->ifa_local;
912 } endfor_ifa(in_dev);
918 EXPORT_SYMBOL(inet_select_addr);
920 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
921 __be32 local, int scope)
928 (local == ifa->ifa_local || !local) &&
929 ifa->ifa_scope <= scope) {
930 addr = ifa->ifa_local;
935 same = (!local || inet_ifa_match(local, ifa)) &&
936 (!dst || inet_ifa_match(dst, ifa));
940 /* Is the selected addr into dst subnet? */
941 if (inet_ifa_match(addr, ifa))
943 /* No, then can we use new local src? */
944 if (ifa->ifa_scope <= scope) {
945 addr = ifa->ifa_local;
948 /* search for large dst subnet for addr */
952 } endfor_ifa(in_dev);
954 return same ? addr : 0;
958 * Confirm that local IP address exists using wildcards:
959 * - in_dev: only on this interface, 0=any interface
960 * - dst: only in the same subnet as dst, 0=any dst
961 * - local: address, 0=autoselect the local address
962 * - scope: maximum allowed scope value for the local address
964 __be32 inet_confirm_addr(struct in_device *in_dev,
965 __be32 dst, __be32 local, int scope)
968 struct net_device *dev;
971 if (scope != RT_SCOPE_LINK)
972 return confirm_addr_indev(in_dev, dst, local, scope);
974 net = dev_net(in_dev->dev);
976 for_each_netdev_rcu(net, dev) {
977 in_dev = __in_dev_get_rcu(dev);
979 addr = confirm_addr_indev(in_dev, dst, local, scope);
993 int register_inetaddr_notifier(struct notifier_block *nb)
995 return blocking_notifier_chain_register(&inetaddr_chain, nb);
997 EXPORT_SYMBOL(register_inetaddr_notifier);
999 int unregister_inetaddr_notifier(struct notifier_block *nb)
1001 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1003 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1005 /* Rename ifa_labels for a device name change. Make some effort to preserve
1006 * existing alias numbering and to create unique labels if possible.
1008 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1010 struct in_ifaddr *ifa;
1013 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1014 char old[IFNAMSIZ], *dot;
1016 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1017 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1020 dot = strchr(old, ':');
1022 sprintf(old, ":%d", named);
1025 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1026 strcat(ifa->ifa_label, dot);
1028 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1030 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1034 static inline bool inetdev_valid_mtu(unsigned mtu)
1039 /* Called only under RTNL semaphore */
1041 static int inetdev_event(struct notifier_block *this, unsigned long event,
1044 struct net_device *dev = ptr;
1045 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1050 if (event == NETDEV_REGISTER) {
1051 in_dev = inetdev_init(dev);
1053 return notifier_from_errno(-ENOMEM);
1054 if (dev->flags & IFF_LOOPBACK) {
1055 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1056 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1058 } else if (event == NETDEV_CHANGEMTU) {
1059 /* Re-enabling IP */
1060 if (inetdev_valid_mtu(dev->mtu))
1061 in_dev = inetdev_init(dev);
1067 case NETDEV_REGISTER:
1068 printk(KERN_DEBUG "inetdev_event: bug\n");
1072 if (!inetdev_valid_mtu(dev->mtu))
1074 if (dev->flags & IFF_LOOPBACK) {
1075 struct in_ifaddr *ifa = inet_alloc_ifa();
1079 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1080 ifa->ifa_prefixlen = 8;
1081 ifa->ifa_mask = inet_make_mask(8);
1082 in_dev_hold(in_dev);
1083 ifa->ifa_dev = in_dev;
1084 ifa->ifa_scope = RT_SCOPE_HOST;
1085 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1086 inet_insert_ifa(ifa);
1091 case NETDEV_NOTIFY_PEERS:
1092 case NETDEV_CHANGEADDR:
1093 /* Send gratuitous ARP to notify of link change */
1094 if (IN_DEV_ARP_NOTIFY(in_dev)) {
1095 struct in_ifaddr *ifa = in_dev->ifa_list;
1098 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1099 ifa->ifa_address, dev,
1100 ifa->ifa_address, NULL,
1101 dev->dev_addr, NULL);
1107 case NETDEV_PRE_TYPE_CHANGE:
1108 ip_mc_unmap(in_dev);
1110 case NETDEV_POST_TYPE_CHANGE:
1111 ip_mc_remap(in_dev);
1113 case NETDEV_CHANGEMTU:
1114 if (inetdev_valid_mtu(dev->mtu))
1116 /* disable IP when MTU is not enough */
1117 case NETDEV_UNREGISTER:
1118 inetdev_destroy(in_dev);
1120 case NETDEV_CHANGENAME:
1121 /* Do not notify about label change, this event is
1122 * not interesting to applications using netlink.
1124 inetdev_changename(dev, in_dev);
1126 devinet_sysctl_unregister(in_dev);
1127 devinet_sysctl_register(in_dev);
1134 static struct notifier_block ip_netdev_notifier = {
1135 .notifier_call = inetdev_event,
1138 static inline size_t inet_nlmsg_size(void)
1140 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1141 + nla_total_size(4) /* IFA_ADDRESS */
1142 + nla_total_size(4) /* IFA_LOCAL */
1143 + nla_total_size(4) /* IFA_BROADCAST */
1144 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1147 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1148 u32 pid, u32 seq, int event, unsigned int flags)
1150 struct ifaddrmsg *ifm;
1151 struct nlmsghdr *nlh;
1153 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1157 ifm = nlmsg_data(nlh);
1158 ifm->ifa_family = AF_INET;
1159 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1160 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1161 ifm->ifa_scope = ifa->ifa_scope;
1162 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1164 if (ifa->ifa_address)
1165 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1168 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1170 if (ifa->ifa_broadcast)
1171 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1173 if (ifa->ifa_label[0])
1174 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1176 return nlmsg_end(skb, nlh);
1179 nlmsg_cancel(skb, nlh);
1183 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1185 struct net *net = sock_net(skb->sk);
1188 int ip_idx, s_ip_idx;
1189 struct net_device *dev;
1190 struct in_device *in_dev;
1191 struct in_ifaddr *ifa;
1192 struct hlist_head *head;
1193 struct hlist_node *node;
1196 s_idx = idx = cb->args[1];
1197 s_ip_idx = ip_idx = cb->args[2];
1199 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1201 head = &net->dev_index_head[h];
1203 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1206 if (h > s_h || idx > s_idx)
1208 in_dev = __in_dev_get_rcu(dev);
1212 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1213 ifa = ifa->ifa_next, ip_idx++) {
1214 if (ip_idx < s_ip_idx)
1216 if (inet_fill_ifaddr(skb, ifa,
1217 NETLINK_CB(cb->skb).pid,
1219 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1233 cb->args[2] = ip_idx;
1238 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1241 struct sk_buff *skb;
1242 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1246 net = dev_net(ifa->ifa_dev->dev);
1247 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1251 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1253 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1254 WARN_ON(err == -EMSGSIZE);
1258 rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1262 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1265 #ifdef CONFIG_SYSCTL
1267 static void devinet_copy_dflt_conf(struct net *net, int i)
1269 struct net_device *dev;
1272 for_each_netdev_rcu(net, dev) {
1273 struct in_device *in_dev;
1275 in_dev = __in_dev_get_rcu(dev);
1276 if (in_dev && !test_bit(i, in_dev->cnf.state))
1277 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1282 /* called with RTNL locked */
1283 static void inet_forward_change(struct net *net)
1285 struct net_device *dev;
1286 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1288 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1289 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1291 for_each_netdev(net, dev) {
1292 struct in_device *in_dev;
1294 dev_disable_lro(dev);
1296 in_dev = __in_dev_get_rcu(dev);
1298 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1303 static int devinet_conf_proc(ctl_table *ctl, int write,
1304 void __user *buffer,
1305 size_t *lenp, loff_t *ppos)
1307 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1310 struct ipv4_devconf *cnf = ctl->extra1;
1311 struct net *net = ctl->extra2;
1312 int i = (int *)ctl->data - cnf->data;
1314 set_bit(i, cnf->state);
1316 if (cnf == net->ipv4.devconf_dflt)
1317 devinet_copy_dflt_conf(net, i);
1323 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1324 void __user *buffer,
1325 size_t *lenp, loff_t *ppos)
1327 int *valp = ctl->data;
1330 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1332 if (write && *valp != val) {
1333 struct net *net = ctl->extra2;
1335 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1336 if (!rtnl_trylock()) {
1337 /* Restore the original values before restarting */
1340 return restart_syscall();
1342 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1343 inet_forward_change(net);
1345 struct ipv4_devconf *cnf = ctl->extra1;
1346 struct in_device *idev =
1347 container_of(cnf, struct in_device, cnf);
1348 dev_disable_lro(idev->dev);
1351 rt_cache_flush(net, 0);
1358 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1359 void __user *buffer,
1360 size_t *lenp, loff_t *ppos)
1362 int *valp = ctl->data;
1364 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1365 struct net *net = ctl->extra2;
1367 if (write && *valp != val)
1368 rt_cache_flush(net, 0);
1373 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1376 .data = ipv4_devconf.data + \
1377 IPV4_DEVCONF_ ## attr - 1, \
1378 .maxlen = sizeof(int), \
1380 .proc_handler = proc, \
1381 .extra1 = &ipv4_devconf, \
1384 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1385 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1387 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1388 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1390 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1391 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1393 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1394 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1396 static struct devinet_sysctl_table {
1397 struct ctl_table_header *sysctl_header;
1398 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1400 } devinet_sysctl = {
1402 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1403 devinet_sysctl_forward),
1404 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1406 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1407 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1408 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1409 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1410 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1411 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1412 "accept_source_route"),
1413 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1414 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1415 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1416 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1417 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1418 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1419 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1420 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1421 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1422 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1423 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1424 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1425 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1427 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1428 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1429 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1430 "force_igmp_version"),
1431 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1432 "promote_secondaries"),
1436 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1437 struct ipv4_devconf *p)
1440 struct devinet_sysctl_table *t;
1442 #define DEVINET_CTL_PATH_DEV 3
1444 struct ctl_path devinet_ctl_path[] = {
1445 { .procname = "net", },
1446 { .procname = "ipv4", },
1447 { .procname = "conf", },
1448 { /* to be set */ },
1452 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1456 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1457 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1458 t->devinet_vars[i].extra1 = p;
1459 t->devinet_vars[i].extra2 = net;
1463 * Make a copy of dev_name, because '.procname' is regarded as const
1464 * by sysctl and we wouldn't want anyone to change it under our feet
1465 * (see SIOCSIFNAME).
1467 t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1471 devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1473 t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1475 if (!t->sysctl_header)
1489 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1491 struct devinet_sysctl_table *t = cnf->sysctl;
1497 unregister_sysctl_table(t->sysctl_header);
1502 static void devinet_sysctl_register(struct in_device *idev)
1504 neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1505 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1509 static void devinet_sysctl_unregister(struct in_device *idev)
1511 __devinet_sysctl_unregister(&idev->cnf);
1512 neigh_sysctl_unregister(idev->arp_parms);
1515 static struct ctl_table ctl_forward_entry[] = {
1517 .procname = "ip_forward",
1518 .data = &ipv4_devconf.data[
1519 IPV4_DEVCONF_FORWARDING - 1],
1520 .maxlen = sizeof(int),
1522 .proc_handler = devinet_sysctl_forward,
1523 .extra1 = &ipv4_devconf,
1524 .extra2 = &init_net,
1529 static __net_initdata struct ctl_path net_ipv4_path[] = {
1530 { .procname = "net", },
1531 { .procname = "ipv4", },
1536 static __net_init int devinet_init_net(struct net *net)
1539 struct ipv4_devconf *all, *dflt;
1540 #ifdef CONFIG_SYSCTL
1541 struct ctl_table *tbl = ctl_forward_entry;
1542 struct ctl_table_header *forw_hdr;
1546 all = &ipv4_devconf;
1547 dflt = &ipv4_devconf_dflt;
1549 if (!net_eq(net, &init_net)) {
1550 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1554 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1556 goto err_alloc_dflt;
1558 #ifdef CONFIG_SYSCTL
1559 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1563 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1564 tbl[0].extra1 = all;
1565 tbl[0].extra2 = net;
1569 #ifdef CONFIG_SYSCTL
1570 err = __devinet_sysctl_register(net, "all", all);
1574 err = __devinet_sysctl_register(net, "default", dflt);
1579 forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1580 if (forw_hdr == NULL)
1582 net->ipv4.forw_hdr = forw_hdr;
1585 net->ipv4.devconf_all = all;
1586 net->ipv4.devconf_dflt = dflt;
1589 #ifdef CONFIG_SYSCTL
1591 __devinet_sysctl_unregister(dflt);
1593 __devinet_sysctl_unregister(all);
1595 if (tbl != ctl_forward_entry)
1599 if (dflt != &ipv4_devconf_dflt)
1602 if (all != &ipv4_devconf)
1608 static __net_exit void devinet_exit_net(struct net *net)
1610 #ifdef CONFIG_SYSCTL
1611 struct ctl_table *tbl;
1613 tbl = net->ipv4.forw_hdr->ctl_table_arg;
1614 unregister_net_sysctl_table(net->ipv4.forw_hdr);
1615 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1616 __devinet_sysctl_unregister(net->ipv4.devconf_all);
1619 kfree(net->ipv4.devconf_dflt);
1620 kfree(net->ipv4.devconf_all);
1623 static __net_initdata struct pernet_operations devinet_ops = {
1624 .init = devinet_init_net,
1625 .exit = devinet_exit_net,
1628 void __init devinet_init(void)
1630 register_pernet_subsys(&devinet_ops);
1632 register_gifconf(PF_INET, inet_gifconf);
1633 register_netdevice_notifier(&ip_netdev_notifier);
1635 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1636 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1637 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);