OSDN Git Service

Merge remote branch 'stable/linux-2.6.35.y' into android-2.6.35
[android-x86/kernel.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58
59 #include <net/arp.h>
60 #include <net/ip.h>
61 #include <net/tcp.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66
67 static struct ipv4_devconf ipv4_devconf = {
68         .data = {
69                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
70                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
71                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
72                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
73         },
74 };
75
76 static struct ipv4_devconf ipv4_devconf_dflt = {
77         .data = {
78                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
79                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
80                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
81                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
82                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
83         },
84 };
85
86 #define IPV4_DEVCONF_DFLT(net, attr) \
87         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
88
89 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
90         [IFA_LOCAL]             = { .type = NLA_U32 },
91         [IFA_ADDRESS]           = { .type = NLA_U32 },
92         [IFA_BROADCAST]         = { .type = NLA_U32 },
93         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
94 };
95
96 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
97
98 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
99 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
100                          int destroy);
101 #ifdef CONFIG_SYSCTL
102 static void devinet_sysctl_register(struct in_device *idev);
103 static void devinet_sysctl_unregister(struct in_device *idev);
104 #else
105 static inline void devinet_sysctl_register(struct in_device *idev)
106 {
107 }
108 static inline void devinet_sysctl_unregister(struct in_device *idev)
109 {
110 }
111 #endif
112
113 /* Locks all the inet devices. */
114
115 static struct in_ifaddr *inet_alloc_ifa(void)
116 {
117         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
118 }
119
120 static void inet_rcu_free_ifa(struct rcu_head *head)
121 {
122         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
123         if (ifa->ifa_dev)
124                 in_dev_put(ifa->ifa_dev);
125         kfree(ifa);
126 }
127
128 static inline void inet_free_ifa(struct in_ifaddr *ifa)
129 {
130         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
131 }
132
133 void in_dev_finish_destroy(struct in_device *idev)
134 {
135         struct net_device *dev = idev->dev;
136
137         WARN_ON(idev->ifa_list);
138         WARN_ON(idev->mc_list);
139 #ifdef NET_REFCNT_DEBUG
140         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
141                idev, dev ? dev->name : "NIL");
142 #endif
143         dev_put(dev);
144         if (!idev->dead)
145                 pr_err("Freeing alive in_device %p\n", idev);
146         else
147                 kfree(idev);
148 }
149 EXPORT_SYMBOL(in_dev_finish_destroy);
150
151 static struct in_device *inetdev_init(struct net_device *dev)
152 {
153         struct in_device *in_dev;
154
155         ASSERT_RTNL();
156
157         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
158         if (!in_dev)
159                 goto out;
160         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
161                         sizeof(in_dev->cnf));
162         in_dev->cnf.sysctl = NULL;
163         in_dev->dev = dev;
164         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
165         if (!in_dev->arp_parms)
166                 goto out_kfree;
167         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
168                 dev_disable_lro(dev);
169         /* Reference in_dev->dev */
170         dev_hold(dev);
171         /* Account for reference dev->ip_ptr (below) */
172         in_dev_hold(in_dev);
173
174         devinet_sysctl_register(in_dev);
175         ip_mc_init_dev(in_dev);
176         if (dev->flags & IFF_UP)
177                 ip_mc_up(in_dev);
178
179         /* we can receive as soon as ip_ptr is set -- do this last */
180         rcu_assign_pointer(dev->ip_ptr, in_dev);
181 out:
182         return in_dev;
183 out_kfree:
184         kfree(in_dev);
185         in_dev = NULL;
186         goto out;
187 }
188
189 static void in_dev_rcu_put(struct rcu_head *head)
190 {
191         struct in_device *idev = container_of(head, struct in_device, rcu_head);
192         in_dev_put(idev);
193 }
194
195 static void inetdev_destroy(struct in_device *in_dev)
196 {
197         struct in_ifaddr *ifa;
198         struct net_device *dev;
199
200         ASSERT_RTNL();
201
202         dev = in_dev->dev;
203
204         in_dev->dead = 1;
205
206         ip_mc_destroy_dev(in_dev);
207
208         while ((ifa = in_dev->ifa_list) != NULL) {
209                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
210                 inet_free_ifa(ifa);
211         }
212
213         dev->ip_ptr = NULL;
214
215         devinet_sysctl_unregister(in_dev);
216         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
217         arp_ifdown(dev);
218
219         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
220 }
221
222 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
223 {
224         rcu_read_lock();
225         for_primary_ifa(in_dev) {
226                 if (inet_ifa_match(a, ifa)) {
227                         if (!b || inet_ifa_match(b, ifa)) {
228                                 rcu_read_unlock();
229                                 return 1;
230                         }
231                 }
232         } endfor_ifa(in_dev);
233         rcu_read_unlock();
234         return 0;
235 }
236
237 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
238                          int destroy, struct nlmsghdr *nlh, u32 pid)
239 {
240         struct in_ifaddr *promote = NULL;
241         struct in_ifaddr *ifa, *ifa1 = *ifap;
242         struct in_ifaddr *last_prim = in_dev->ifa_list;
243         struct in_ifaddr *prev_prom = NULL;
244         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
245
246         ASSERT_RTNL();
247
248         /* 1. Deleting primary ifaddr forces deletion all secondaries
249          * unless alias promotion is set
250          **/
251
252         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
253                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
254
255                 while ((ifa = *ifap1) != NULL) {
256                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
257                             ifa1->ifa_scope <= ifa->ifa_scope)
258                                 last_prim = ifa;
259
260                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
261                             ifa1->ifa_mask != ifa->ifa_mask ||
262                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
263                                 ifap1 = &ifa->ifa_next;
264                                 prev_prom = ifa;
265                                 continue;
266                         }
267
268                         if (!do_promote) {
269                                 *ifap1 = ifa->ifa_next;
270
271                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
272                                 blocking_notifier_call_chain(&inetaddr_chain,
273                                                 NETDEV_DOWN, ifa);
274                                 inet_free_ifa(ifa);
275                         } else {
276                                 promote = ifa;
277                                 break;
278                         }
279                 }
280         }
281
282         /* 2. Unlink it */
283
284         *ifap = ifa1->ifa_next;
285
286         /* 3. Announce address deletion */
287
288         /* Send message first, then call notifier.
289            At first sight, FIB update triggered by notifier
290            will refer to already deleted ifaddr, that could confuse
291            netlink listeners. It is not true: look, gated sees
292            that route deleted and if it still thinks that ifaddr
293            is valid, it will try to restore deleted routes... Grr.
294            So that, this order is correct.
295          */
296         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
297         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
298
299         if (promote) {
300
301                 if (prev_prom) {
302                         prev_prom->ifa_next = promote->ifa_next;
303                         promote->ifa_next = last_prim->ifa_next;
304                         last_prim->ifa_next = promote;
305                 }
306
307                 promote->ifa_flags &= ~IFA_F_SECONDARY;
308                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
309                 blocking_notifier_call_chain(&inetaddr_chain,
310                                 NETDEV_UP, promote);
311                 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
312                         if (ifa1->ifa_mask != ifa->ifa_mask ||
313                             !inet_ifa_match(ifa1->ifa_address, ifa))
314                                         continue;
315                         fib_add_ifaddr(ifa);
316                 }
317
318         }
319         if (destroy)
320                 inet_free_ifa(ifa1);
321 }
322
323 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
324                          int destroy)
325 {
326         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
327 }
328
329 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
330                              u32 pid)
331 {
332         struct in_device *in_dev = ifa->ifa_dev;
333         struct in_ifaddr *ifa1, **ifap, **last_primary;
334
335         ASSERT_RTNL();
336
337         if (!ifa->ifa_local) {
338                 inet_free_ifa(ifa);
339                 return 0;
340         }
341
342         ifa->ifa_flags &= ~IFA_F_SECONDARY;
343         last_primary = &in_dev->ifa_list;
344
345         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
346              ifap = &ifa1->ifa_next) {
347                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
348                     ifa->ifa_scope <= ifa1->ifa_scope)
349                         last_primary = &ifa1->ifa_next;
350                 if (ifa1->ifa_mask == ifa->ifa_mask &&
351                     inet_ifa_match(ifa1->ifa_address, ifa)) {
352                         if (ifa1->ifa_local == ifa->ifa_local) {
353                                 inet_free_ifa(ifa);
354                                 return -EEXIST;
355                         }
356                         if (ifa1->ifa_scope != ifa->ifa_scope) {
357                                 inet_free_ifa(ifa);
358                                 return -EINVAL;
359                         }
360                         ifa->ifa_flags |= IFA_F_SECONDARY;
361                 }
362         }
363
364         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
365                 net_srandom(ifa->ifa_local);
366                 ifap = last_primary;
367         }
368
369         ifa->ifa_next = *ifap;
370         *ifap = ifa;
371
372         /* Send message first, then call notifier.
373            Notifier will trigger FIB update, so that
374            listeners of netlink will know about new ifaddr */
375         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
376         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
377
378         return 0;
379 }
380
381 static int inet_insert_ifa(struct in_ifaddr *ifa)
382 {
383         return __inet_insert_ifa(ifa, NULL, 0);
384 }
385
386 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
387 {
388         struct in_device *in_dev = __in_dev_get_rtnl(dev);
389
390         ASSERT_RTNL();
391
392         if (!in_dev) {
393                 inet_free_ifa(ifa);
394                 return -ENOBUFS;
395         }
396         ipv4_devconf_setall(in_dev);
397         if (ifa->ifa_dev != in_dev) {
398                 WARN_ON(ifa->ifa_dev);
399                 in_dev_hold(in_dev);
400                 ifa->ifa_dev = in_dev;
401         }
402         if (ipv4_is_loopback(ifa->ifa_local))
403                 ifa->ifa_scope = RT_SCOPE_HOST;
404         return inet_insert_ifa(ifa);
405 }
406
407 struct in_device *inetdev_by_index(struct net *net, int ifindex)
408 {
409         struct net_device *dev;
410         struct in_device *in_dev = NULL;
411
412         rcu_read_lock();
413         dev = dev_get_by_index_rcu(net, ifindex);
414         if (dev)
415                 in_dev = in_dev_get(dev);
416         rcu_read_unlock();
417         return in_dev;
418 }
419 EXPORT_SYMBOL(inetdev_by_index);
420
421 /* Called only from RTNL semaphored context. No locks. */
422
423 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
424                                     __be32 mask)
425 {
426         ASSERT_RTNL();
427
428         for_primary_ifa(in_dev) {
429                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
430                         return ifa;
431         } endfor_ifa(in_dev);
432         return NULL;
433 }
434
435 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
436 {
437         struct net *net = sock_net(skb->sk);
438         struct nlattr *tb[IFA_MAX+1];
439         struct in_device *in_dev;
440         struct ifaddrmsg *ifm;
441         struct in_ifaddr *ifa, **ifap;
442         int err = -EINVAL;
443
444         ASSERT_RTNL();
445
446         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
447         if (err < 0)
448                 goto errout;
449
450         ifm = nlmsg_data(nlh);
451         in_dev = inetdev_by_index(net, ifm->ifa_index);
452         if (in_dev == NULL) {
453                 err = -ENODEV;
454                 goto errout;
455         }
456
457         __in_dev_put(in_dev);
458
459         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
460              ifap = &ifa->ifa_next) {
461                 if (tb[IFA_LOCAL] &&
462                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
463                         continue;
464
465                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
466                         continue;
467
468                 if (tb[IFA_ADDRESS] &&
469                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
470                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
471                         continue;
472
473                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
474                 return 0;
475         }
476
477         err = -EADDRNOTAVAIL;
478 errout:
479         return err;
480 }
481
482 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
483 {
484         struct nlattr *tb[IFA_MAX+1];
485         struct in_ifaddr *ifa;
486         struct ifaddrmsg *ifm;
487         struct net_device *dev;
488         struct in_device *in_dev;
489         int err;
490
491         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
492         if (err < 0)
493                 goto errout;
494
495         ifm = nlmsg_data(nlh);
496         err = -EINVAL;
497         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
498                 goto errout;
499
500         dev = __dev_get_by_index(net, ifm->ifa_index);
501         err = -ENODEV;
502         if (dev == NULL)
503                 goto errout;
504
505         in_dev = __in_dev_get_rtnl(dev);
506         err = -ENOBUFS;
507         if (in_dev == NULL)
508                 goto errout;
509
510         ifa = inet_alloc_ifa();
511         if (ifa == NULL)
512                 /*
513                  * A potential indev allocation can be left alive, it stays
514                  * assigned to its device and is destroy with it.
515                  */
516                 goto errout;
517
518         ipv4_devconf_setall(in_dev);
519         in_dev_hold(in_dev);
520
521         if (tb[IFA_ADDRESS] == NULL)
522                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
523
524         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
525         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
526         ifa->ifa_flags = ifm->ifa_flags;
527         ifa->ifa_scope = ifm->ifa_scope;
528         ifa->ifa_dev = in_dev;
529
530         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
531         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
532
533         if (tb[IFA_BROADCAST])
534                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
535
536         if (tb[IFA_LABEL])
537                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
538         else
539                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
540
541         return ifa;
542
543 errout:
544         return ERR_PTR(err);
545 }
546
547 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
548 {
549         struct net *net = sock_net(skb->sk);
550         struct in_ifaddr *ifa;
551
552         ASSERT_RTNL();
553
554         ifa = rtm_to_ifaddr(net, nlh);
555         if (IS_ERR(ifa))
556                 return PTR_ERR(ifa);
557
558         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
559 }
560
561 /*
562  *      Determine a default network mask, based on the IP address.
563  */
564
565 static inline int inet_abc_len(__be32 addr)
566 {
567         int rc = -1;    /* Something else, probably a multicast. */
568
569         if (ipv4_is_zeronet(addr))
570                 rc = 0;
571         else {
572                 __u32 haddr = ntohl(addr);
573
574                 if (IN_CLASSA(haddr))
575                         rc = 8;
576                 else if (IN_CLASSB(haddr))
577                         rc = 16;
578                 else if (IN_CLASSC(haddr))
579                         rc = 24;
580         }
581
582         return rc;
583 }
584
585
586 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
587 {
588         struct ifreq ifr;
589         struct sockaddr_in sin_orig;
590         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
591         struct in_device *in_dev;
592         struct in_ifaddr **ifap = NULL;
593         struct in_ifaddr *ifa = NULL;
594         struct net_device *dev;
595         char *colon;
596         int ret = -EFAULT;
597         int tryaddrmatch = 0;
598
599         /*
600          *      Fetch the caller's info block into kernel space
601          */
602
603         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
604                 goto out;
605         ifr.ifr_name[IFNAMSIZ - 1] = 0;
606
607         /* save original address for comparison */
608         memcpy(&sin_orig, sin, sizeof(*sin));
609
610         colon = strchr(ifr.ifr_name, ':');
611         if (colon)
612                 *colon = 0;
613
614         dev_load(net, ifr.ifr_name);
615
616         switch (cmd) {
617         case SIOCGIFADDR:       /* Get interface address */
618         case SIOCGIFBRDADDR:    /* Get the broadcast address */
619         case SIOCGIFDSTADDR:    /* Get the destination address */
620         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
621                 /* Note that these ioctls will not sleep,
622                    so that we do not impose a lock.
623                    One day we will be forced to put shlock here (I mean SMP)
624                  */
625                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
626                 memset(sin, 0, sizeof(*sin));
627                 sin->sin_family = AF_INET;
628                 break;
629
630         case SIOCSIFFLAGS:
631                 ret = -EACCES;
632                 if (!capable(CAP_NET_ADMIN))
633                         goto out;
634                 break;
635         case SIOCSIFADDR:       /* Set interface address (and family) */
636         case SIOCSIFBRDADDR:    /* Set the broadcast address */
637         case SIOCSIFDSTADDR:    /* Set the destination address */
638         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
639         case SIOCKILLADDR:      /* Nuke all sockets on this address */
640                 ret = -EACCES;
641                 if (!capable(CAP_NET_ADMIN))
642                         goto out;
643                 ret = -EINVAL;
644                 if (sin->sin_family != AF_INET)
645                         goto out;
646                 break;
647         default:
648                 ret = -EINVAL;
649                 goto out;
650         }
651
652         rtnl_lock();
653
654         ret = -ENODEV;
655         dev = __dev_get_by_name(net, ifr.ifr_name);
656         if (!dev)
657                 goto done;
658
659         if (colon)
660                 *colon = ':';
661
662         in_dev = __in_dev_get_rtnl(dev);
663         if (in_dev) {
664                 if (tryaddrmatch) {
665                         /* Matthias Andree */
666                         /* compare label and address (4.4BSD style) */
667                         /* note: we only do this for a limited set of ioctls
668                            and only if the original address family was AF_INET.
669                            This is checked above. */
670                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
671                              ifap = &ifa->ifa_next) {
672                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
673                                     sin_orig.sin_addr.s_addr ==
674                                                         ifa->ifa_address) {
675                                         break; /* found */
676                                 }
677                         }
678                 }
679                 /* we didn't get a match, maybe the application is
680                    4.3BSD-style and passed in junk so we fall back to
681                    comparing just the label */
682                 if (!ifa) {
683                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
684                              ifap = &ifa->ifa_next)
685                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
686                                         break;
687                 }
688         }
689
690         ret = -EADDRNOTAVAIL;
691         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS
692             && cmd != SIOCKILLADDR)
693                 goto done;
694
695         switch (cmd) {
696         case SIOCGIFADDR:       /* Get interface address */
697                 sin->sin_addr.s_addr = ifa->ifa_local;
698                 goto rarok;
699
700         case SIOCGIFBRDADDR:    /* Get the broadcast address */
701                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
702                 goto rarok;
703
704         case SIOCGIFDSTADDR:    /* Get the destination address */
705                 sin->sin_addr.s_addr = ifa->ifa_address;
706                 goto rarok;
707
708         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
709                 sin->sin_addr.s_addr = ifa->ifa_mask;
710                 goto rarok;
711
712         case SIOCSIFFLAGS:
713                 if (colon) {
714                         ret = -EADDRNOTAVAIL;
715                         if (!ifa)
716                                 break;
717                         ret = 0;
718                         if (!(ifr.ifr_flags & IFF_UP))
719                                 inet_del_ifa(in_dev, ifap, 1);
720                         break;
721                 }
722                 ret = dev_change_flags(dev, ifr.ifr_flags);
723                 break;
724
725         case SIOCSIFADDR:       /* Set interface address (and family) */
726                 ret = -EINVAL;
727                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
728                         break;
729
730                 if (!ifa) {
731                         ret = -ENOBUFS;
732                         ifa = inet_alloc_ifa();
733                         if (!ifa)
734                                 break;
735                         if (colon)
736                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
737                         else
738                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
739                 } else {
740                         ret = 0;
741                         if (ifa->ifa_local == sin->sin_addr.s_addr)
742                                 break;
743                         inet_del_ifa(in_dev, ifap, 0);
744                         ifa->ifa_broadcast = 0;
745                         ifa->ifa_scope = 0;
746                 }
747
748                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
749
750                 if (!(dev->flags & IFF_POINTOPOINT)) {
751                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
752                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
753                         if ((dev->flags & IFF_BROADCAST) &&
754                             ifa->ifa_prefixlen < 31)
755                                 ifa->ifa_broadcast = ifa->ifa_address |
756                                                      ~ifa->ifa_mask;
757                 } else {
758                         ifa->ifa_prefixlen = 32;
759                         ifa->ifa_mask = inet_make_mask(32);
760                 }
761                 ret = inet_set_ifa(dev, ifa);
762                 break;
763
764         case SIOCSIFBRDADDR:    /* Set the broadcast address */
765                 ret = 0;
766                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
767                         inet_del_ifa(in_dev, ifap, 0);
768                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
769                         inet_insert_ifa(ifa);
770                 }
771                 break;
772
773         case SIOCSIFDSTADDR:    /* Set the destination address */
774                 ret = 0;
775                 if (ifa->ifa_address == sin->sin_addr.s_addr)
776                         break;
777                 ret = -EINVAL;
778                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
779                         break;
780                 ret = 0;
781                 inet_del_ifa(in_dev, ifap, 0);
782                 ifa->ifa_address = sin->sin_addr.s_addr;
783                 inet_insert_ifa(ifa);
784                 break;
785
786         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
787
788                 /*
789                  *      The mask we set must be legal.
790                  */
791                 ret = -EINVAL;
792                 if (bad_mask(sin->sin_addr.s_addr, 0))
793                         break;
794                 ret = 0;
795                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
796                         __be32 old_mask = ifa->ifa_mask;
797                         inet_del_ifa(in_dev, ifap, 0);
798                         ifa->ifa_mask = sin->sin_addr.s_addr;
799                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
800
801                         /* See if current broadcast address matches
802                          * with current netmask, then recalculate
803                          * the broadcast address. Otherwise it's a
804                          * funny address, so don't touch it since
805                          * the user seems to know what (s)he's doing...
806                          */
807                         if ((dev->flags & IFF_BROADCAST) &&
808                             (ifa->ifa_prefixlen < 31) &&
809                             (ifa->ifa_broadcast ==
810                              (ifa->ifa_local|~old_mask))) {
811                                 ifa->ifa_broadcast = (ifa->ifa_local |
812                                                       ~sin->sin_addr.s_addr);
813                         }
814                         inet_insert_ifa(ifa);
815                 }
816                 break;
817         case SIOCKILLADDR:      /* Nuke all connections on this address */
818                 ret = 0;
819                 tcp_v4_nuke_addr(sin->sin_addr.s_addr);
820                 break;
821         }
822 done:
823         rtnl_unlock();
824 out:
825         return ret;
826 rarok:
827         rtnl_unlock();
828         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
829         goto out;
830 }
831
832 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
833 {
834         struct in_device *in_dev = __in_dev_get_rtnl(dev);
835         struct in_ifaddr *ifa;
836         struct ifreq ifr;
837         int done = 0;
838
839         if (!in_dev)
840                 goto out;
841
842         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
843                 if (!buf) {
844                         done += sizeof(ifr);
845                         continue;
846                 }
847                 if (len < (int) sizeof(ifr))
848                         break;
849                 memset(&ifr, 0, sizeof(struct ifreq));
850                 if (ifa->ifa_label)
851                         strcpy(ifr.ifr_name, ifa->ifa_label);
852                 else
853                         strcpy(ifr.ifr_name, dev->name);
854
855                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
856                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
857                                                                 ifa->ifa_local;
858
859                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
860                         done = -EFAULT;
861                         break;
862                 }
863                 buf  += sizeof(struct ifreq);
864                 len  -= sizeof(struct ifreq);
865                 done += sizeof(struct ifreq);
866         }
867 out:
868         return done;
869 }
870
871 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
872 {
873         __be32 addr = 0;
874         struct in_device *in_dev;
875         struct net *net = dev_net(dev);
876
877         rcu_read_lock();
878         in_dev = __in_dev_get_rcu(dev);
879         if (!in_dev)
880                 goto no_in_dev;
881
882         for_primary_ifa(in_dev) {
883                 if (ifa->ifa_scope > scope)
884                         continue;
885                 if (!dst || inet_ifa_match(dst, ifa)) {
886                         addr = ifa->ifa_local;
887                         break;
888                 }
889                 if (!addr)
890                         addr = ifa->ifa_local;
891         } endfor_ifa(in_dev);
892
893         if (addr)
894                 goto out_unlock;
895 no_in_dev:
896
897         /* Not loopback addresses on loopback should be preferred
898            in this case. It is importnat that lo is the first interface
899            in dev_base list.
900          */
901         for_each_netdev_rcu(net, dev) {
902                 in_dev = __in_dev_get_rcu(dev);
903                 if (!in_dev)
904                         continue;
905
906                 for_primary_ifa(in_dev) {
907                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
908                             ifa->ifa_scope <= scope) {
909                                 addr = ifa->ifa_local;
910                                 goto out_unlock;
911                         }
912                 } endfor_ifa(in_dev);
913         }
914 out_unlock:
915         rcu_read_unlock();
916         return addr;
917 }
918 EXPORT_SYMBOL(inet_select_addr);
919
920 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
921                               __be32 local, int scope)
922 {
923         int same = 0;
924         __be32 addr = 0;
925
926         for_ifa(in_dev) {
927                 if (!addr &&
928                     (local == ifa->ifa_local || !local) &&
929                     ifa->ifa_scope <= scope) {
930                         addr = ifa->ifa_local;
931                         if (same)
932                                 break;
933                 }
934                 if (!same) {
935                         same = (!local || inet_ifa_match(local, ifa)) &&
936                                 (!dst || inet_ifa_match(dst, ifa));
937                         if (same && addr) {
938                                 if (local || !dst)
939                                         break;
940                                 /* Is the selected addr into dst subnet? */
941                                 if (inet_ifa_match(addr, ifa))
942                                         break;
943                                 /* No, then can we use new local src? */
944                                 if (ifa->ifa_scope <= scope) {
945                                         addr = ifa->ifa_local;
946                                         break;
947                                 }
948                                 /* search for large dst subnet for addr */
949                                 same = 0;
950                         }
951                 }
952         } endfor_ifa(in_dev);
953
954         return same ? addr : 0;
955 }
956
957 /*
958  * Confirm that local IP address exists using wildcards:
959  * - in_dev: only on this interface, 0=any interface
960  * - dst: only in the same subnet as dst, 0=any dst
961  * - local: address, 0=autoselect the local address
962  * - scope: maximum allowed scope value for the local address
963  */
964 __be32 inet_confirm_addr(struct in_device *in_dev,
965                          __be32 dst, __be32 local, int scope)
966 {
967         __be32 addr = 0;
968         struct net_device *dev;
969         struct net *net;
970
971         if (scope != RT_SCOPE_LINK)
972                 return confirm_addr_indev(in_dev, dst, local, scope);
973
974         net = dev_net(in_dev->dev);
975         rcu_read_lock();
976         for_each_netdev_rcu(net, dev) {
977                 in_dev = __in_dev_get_rcu(dev);
978                 if (in_dev) {
979                         addr = confirm_addr_indev(in_dev, dst, local, scope);
980                         if (addr)
981                                 break;
982                 }
983         }
984         rcu_read_unlock();
985
986         return addr;
987 }
988
989 /*
990  *      Device notifier
991  */
992
993 int register_inetaddr_notifier(struct notifier_block *nb)
994 {
995         return blocking_notifier_chain_register(&inetaddr_chain, nb);
996 }
997 EXPORT_SYMBOL(register_inetaddr_notifier);
998
999 int unregister_inetaddr_notifier(struct notifier_block *nb)
1000 {
1001         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1002 }
1003 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1004
1005 /* Rename ifa_labels for a device name change. Make some effort to preserve
1006  * existing alias numbering and to create unique labels if possible.
1007 */
1008 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1009 {
1010         struct in_ifaddr *ifa;
1011         int named = 0;
1012
1013         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1014                 char old[IFNAMSIZ], *dot;
1015
1016                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1017                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1018                 if (named++ == 0)
1019                         goto skip;
1020                 dot = strchr(old, ':');
1021                 if (dot == NULL) {
1022                         sprintf(old, ":%d", named);
1023                         dot = old;
1024                 }
1025                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1026                         strcat(ifa->ifa_label, dot);
1027                 else
1028                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1029 skip:
1030                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1031         }
1032 }
1033
1034 static inline bool inetdev_valid_mtu(unsigned mtu)
1035 {
1036         return mtu >= 68;
1037 }
1038
1039 /* Called only under RTNL semaphore */
1040
1041 static int inetdev_event(struct notifier_block *this, unsigned long event,
1042                          void *ptr)
1043 {
1044         struct net_device *dev = ptr;
1045         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1046
1047         ASSERT_RTNL();
1048
1049         if (!in_dev) {
1050                 if (event == NETDEV_REGISTER) {
1051                         in_dev = inetdev_init(dev);
1052                         if (!in_dev)
1053                                 return notifier_from_errno(-ENOMEM);
1054                         if (dev->flags & IFF_LOOPBACK) {
1055                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1056                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1057                         }
1058                 } else if (event == NETDEV_CHANGEMTU) {
1059                         /* Re-enabling IP */
1060                         if (inetdev_valid_mtu(dev->mtu))
1061                                 in_dev = inetdev_init(dev);
1062                 }
1063                 goto out;
1064         }
1065
1066         switch (event) {
1067         case NETDEV_REGISTER:
1068                 printk(KERN_DEBUG "inetdev_event: bug\n");
1069                 dev->ip_ptr = NULL;
1070                 break;
1071         case NETDEV_UP:
1072                 if (!inetdev_valid_mtu(dev->mtu))
1073                         break;
1074                 if (dev->flags & IFF_LOOPBACK) {
1075                         struct in_ifaddr *ifa = inet_alloc_ifa();
1076
1077                         if (ifa) {
1078                                 ifa->ifa_local =
1079                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1080                                 ifa->ifa_prefixlen = 8;
1081                                 ifa->ifa_mask = inet_make_mask(8);
1082                                 in_dev_hold(in_dev);
1083                                 ifa->ifa_dev = in_dev;
1084                                 ifa->ifa_scope = RT_SCOPE_HOST;
1085                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1086                                 inet_insert_ifa(ifa);
1087                         }
1088                 }
1089                 ip_mc_up(in_dev);
1090                 /* fall through */
1091         case NETDEV_NOTIFY_PEERS:
1092         case NETDEV_CHANGEADDR:
1093                 /* Send gratuitous ARP to notify of link change */
1094                 if (IN_DEV_ARP_NOTIFY(in_dev)) {
1095                         struct in_ifaddr *ifa = in_dev->ifa_list;
1096
1097                         if (ifa)
1098                                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1099                                          ifa->ifa_address, dev,
1100                                          ifa->ifa_address, NULL,
1101                                          dev->dev_addr, NULL);
1102                 }
1103                 break;
1104         case NETDEV_DOWN:
1105                 ip_mc_down(in_dev);
1106                 break;
1107         case NETDEV_PRE_TYPE_CHANGE:
1108                 ip_mc_unmap(in_dev);
1109                 break;
1110         case NETDEV_POST_TYPE_CHANGE:
1111                 ip_mc_remap(in_dev);
1112                 break;
1113         case NETDEV_CHANGEMTU:
1114                 if (inetdev_valid_mtu(dev->mtu))
1115                         break;
1116                 /* disable IP when MTU is not enough */
1117         case NETDEV_UNREGISTER:
1118                 inetdev_destroy(in_dev);
1119                 break;
1120         case NETDEV_CHANGENAME:
1121                 /* Do not notify about label change, this event is
1122                  * not interesting to applications using netlink.
1123                  */
1124                 inetdev_changename(dev, in_dev);
1125
1126                 devinet_sysctl_unregister(in_dev);
1127                 devinet_sysctl_register(in_dev);
1128                 break;
1129         }
1130 out:
1131         return NOTIFY_DONE;
1132 }
1133
1134 static struct notifier_block ip_netdev_notifier = {
1135         .notifier_call = inetdev_event,
1136 };
1137
1138 static inline size_t inet_nlmsg_size(void)
1139 {
1140         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1141                + nla_total_size(4) /* IFA_ADDRESS */
1142                + nla_total_size(4) /* IFA_LOCAL */
1143                + nla_total_size(4) /* IFA_BROADCAST */
1144                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1145 }
1146
1147 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1148                             u32 pid, u32 seq, int event, unsigned int flags)
1149 {
1150         struct ifaddrmsg *ifm;
1151         struct nlmsghdr  *nlh;
1152
1153         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1154         if (nlh == NULL)
1155                 return -EMSGSIZE;
1156
1157         ifm = nlmsg_data(nlh);
1158         ifm->ifa_family = AF_INET;
1159         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1160         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1161         ifm->ifa_scope = ifa->ifa_scope;
1162         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1163
1164         if (ifa->ifa_address)
1165                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1166
1167         if (ifa->ifa_local)
1168                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1169
1170         if (ifa->ifa_broadcast)
1171                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1172
1173         if (ifa->ifa_label[0])
1174                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1175
1176         return nlmsg_end(skb, nlh);
1177
1178 nla_put_failure:
1179         nlmsg_cancel(skb, nlh);
1180         return -EMSGSIZE;
1181 }
1182
1183 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1184 {
1185         struct net *net = sock_net(skb->sk);
1186         int h, s_h;
1187         int idx, s_idx;
1188         int ip_idx, s_ip_idx;
1189         struct net_device *dev;
1190         struct in_device *in_dev;
1191         struct in_ifaddr *ifa;
1192         struct hlist_head *head;
1193         struct hlist_node *node;
1194
1195         s_h = cb->args[0];
1196         s_idx = idx = cb->args[1];
1197         s_ip_idx = ip_idx = cb->args[2];
1198
1199         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1200                 idx = 0;
1201                 head = &net->dev_index_head[h];
1202                 rcu_read_lock();
1203                 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1204                         if (idx < s_idx)
1205                                 goto cont;
1206                         if (h > s_h || idx > s_idx)
1207                                 s_ip_idx = 0;
1208                         in_dev = __in_dev_get_rcu(dev);
1209                         if (!in_dev)
1210                                 goto cont;
1211
1212                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1213                              ifa = ifa->ifa_next, ip_idx++) {
1214                                 if (ip_idx < s_ip_idx)
1215                                         continue;
1216                                 if (inet_fill_ifaddr(skb, ifa,
1217                                              NETLINK_CB(cb->skb).pid,
1218                                              cb->nlh->nlmsg_seq,
1219                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1220                                         rcu_read_unlock();
1221                                         goto done;
1222                                 }
1223                         }
1224 cont:
1225                         idx++;
1226                 }
1227                 rcu_read_unlock();
1228         }
1229
1230 done:
1231         cb->args[0] = h;
1232         cb->args[1] = idx;
1233         cb->args[2] = ip_idx;
1234
1235         return skb->len;
1236 }
1237
1238 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1239                       u32 pid)
1240 {
1241         struct sk_buff *skb;
1242         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1243         int err = -ENOBUFS;
1244         struct net *net;
1245
1246         net = dev_net(ifa->ifa_dev->dev);
1247         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1248         if (skb == NULL)
1249                 goto errout;
1250
1251         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1252         if (err < 0) {
1253                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1254                 WARN_ON(err == -EMSGSIZE);
1255                 kfree_skb(skb);
1256                 goto errout;
1257         }
1258         rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1259         return;
1260 errout:
1261         if (err < 0)
1262                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1263 }
1264
1265 #ifdef CONFIG_SYSCTL
1266
1267 static void devinet_copy_dflt_conf(struct net *net, int i)
1268 {
1269         struct net_device *dev;
1270
1271         rcu_read_lock();
1272         for_each_netdev_rcu(net, dev) {
1273                 struct in_device *in_dev;
1274
1275                 in_dev = __in_dev_get_rcu(dev);
1276                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1277                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1278         }
1279         rcu_read_unlock();
1280 }
1281
1282 /* called with RTNL locked */
1283 static void inet_forward_change(struct net *net)
1284 {
1285         struct net_device *dev;
1286         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1287
1288         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1289         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1290
1291         for_each_netdev(net, dev) {
1292                 struct in_device *in_dev;
1293                 if (on)
1294                         dev_disable_lro(dev);
1295                 rcu_read_lock();
1296                 in_dev = __in_dev_get_rcu(dev);
1297                 if (in_dev)
1298                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1299                 rcu_read_unlock();
1300         }
1301 }
1302
1303 static int devinet_conf_proc(ctl_table *ctl, int write,
1304                              void __user *buffer,
1305                              size_t *lenp, loff_t *ppos)
1306 {
1307         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1308
1309         if (write) {
1310                 struct ipv4_devconf *cnf = ctl->extra1;
1311                 struct net *net = ctl->extra2;
1312                 int i = (int *)ctl->data - cnf->data;
1313
1314                 set_bit(i, cnf->state);
1315
1316                 if (cnf == net->ipv4.devconf_dflt)
1317                         devinet_copy_dflt_conf(net, i);
1318         }
1319
1320         return ret;
1321 }
1322
1323 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1324                                   void __user *buffer,
1325                                   size_t *lenp, loff_t *ppos)
1326 {
1327         int *valp = ctl->data;
1328         int val = *valp;
1329         loff_t pos = *ppos;
1330         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1331
1332         if (write && *valp != val) {
1333                 struct net *net = ctl->extra2;
1334
1335                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1336                         if (!rtnl_trylock()) {
1337                                 /* Restore the original values before restarting */
1338                                 *valp = val;
1339                                 *ppos = pos;
1340                                 return restart_syscall();
1341                         }
1342                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1343                                 inet_forward_change(net);
1344                         } else if (*valp) {
1345                                 struct ipv4_devconf *cnf = ctl->extra1;
1346                                 struct in_device *idev =
1347                                         container_of(cnf, struct in_device, cnf);
1348                                 dev_disable_lro(idev->dev);
1349                         }
1350                         rtnl_unlock();
1351                         rt_cache_flush(net, 0);
1352                 }
1353         }
1354
1355         return ret;
1356 }
1357
1358 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1359                          void __user *buffer,
1360                          size_t *lenp, loff_t *ppos)
1361 {
1362         int *valp = ctl->data;
1363         int val = *valp;
1364         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1365         struct net *net = ctl->extra2;
1366
1367         if (write && *valp != val)
1368                 rt_cache_flush(net, 0);
1369
1370         return ret;
1371 }
1372
1373 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1374         { \
1375                 .procname       = name, \
1376                 .data           = ipv4_devconf.data + \
1377                                   IPV4_DEVCONF_ ## attr - 1, \
1378                 .maxlen         = sizeof(int), \
1379                 .mode           = mval, \
1380                 .proc_handler   = proc, \
1381                 .extra1         = &ipv4_devconf, \
1382         }
1383
1384 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1385         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1386
1387 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1388         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1389
1390 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1391         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1392
1393 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1394         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1395
1396 static struct devinet_sysctl_table {
1397         struct ctl_table_header *sysctl_header;
1398         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1399         char *dev_name;
1400 } devinet_sysctl = {
1401         .devinet_vars = {
1402                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1403                                              devinet_sysctl_forward),
1404                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1405
1406                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1407                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1408                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1409                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1410                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1411                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1412                                         "accept_source_route"),
1413                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1414                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1415                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1416                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1417                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1418                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1419                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1420                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1421                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1422                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1423                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1424                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1425                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1426
1427                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1428                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1429                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1430                                               "force_igmp_version"),
1431                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1432                                               "promote_secondaries"),
1433         },
1434 };
1435
1436 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1437                                         struct ipv4_devconf *p)
1438 {
1439         int i;
1440         struct devinet_sysctl_table *t;
1441
1442 #define DEVINET_CTL_PATH_DEV    3
1443
1444         struct ctl_path devinet_ctl_path[] = {
1445                 { .procname = "net",  },
1446                 { .procname = "ipv4", },
1447                 { .procname = "conf", },
1448                 { /* to be set */ },
1449                 { },
1450         };
1451
1452         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1453         if (!t)
1454                 goto out;
1455
1456         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1457                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1458                 t->devinet_vars[i].extra1 = p;
1459                 t->devinet_vars[i].extra2 = net;
1460         }
1461
1462         /*
1463          * Make a copy of dev_name, because '.procname' is regarded as const
1464          * by sysctl and we wouldn't want anyone to change it under our feet
1465          * (see SIOCSIFNAME).
1466          */
1467         t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1468         if (!t->dev_name)
1469                 goto free;
1470
1471         devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1472
1473         t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1474                         t->devinet_vars);
1475         if (!t->sysctl_header)
1476                 goto free_procname;
1477
1478         p->sysctl = t;
1479         return 0;
1480
1481 free_procname:
1482         kfree(t->dev_name);
1483 free:
1484         kfree(t);
1485 out:
1486         return -ENOBUFS;
1487 }
1488
1489 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1490 {
1491         struct devinet_sysctl_table *t = cnf->sysctl;
1492
1493         if (t == NULL)
1494                 return;
1495
1496         cnf->sysctl = NULL;
1497         unregister_sysctl_table(t->sysctl_header);
1498         kfree(t->dev_name);
1499         kfree(t);
1500 }
1501
1502 static void devinet_sysctl_register(struct in_device *idev)
1503 {
1504         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1505         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1506                                         &idev->cnf);
1507 }
1508
1509 static void devinet_sysctl_unregister(struct in_device *idev)
1510 {
1511         __devinet_sysctl_unregister(&idev->cnf);
1512         neigh_sysctl_unregister(idev->arp_parms);
1513 }
1514
1515 static struct ctl_table ctl_forward_entry[] = {
1516         {
1517                 .procname       = "ip_forward",
1518                 .data           = &ipv4_devconf.data[
1519                                         IPV4_DEVCONF_FORWARDING - 1],
1520                 .maxlen         = sizeof(int),
1521                 .mode           = 0644,
1522                 .proc_handler   = devinet_sysctl_forward,
1523                 .extra1         = &ipv4_devconf,
1524                 .extra2         = &init_net,
1525         },
1526         { },
1527 };
1528
1529 static __net_initdata struct ctl_path net_ipv4_path[] = {
1530         { .procname = "net", },
1531         { .procname = "ipv4", },
1532         { },
1533 };
1534 #endif
1535
1536 static __net_init int devinet_init_net(struct net *net)
1537 {
1538         int err;
1539         struct ipv4_devconf *all, *dflt;
1540 #ifdef CONFIG_SYSCTL
1541         struct ctl_table *tbl = ctl_forward_entry;
1542         struct ctl_table_header *forw_hdr;
1543 #endif
1544
1545         err = -ENOMEM;
1546         all = &ipv4_devconf;
1547         dflt = &ipv4_devconf_dflt;
1548
1549         if (!net_eq(net, &init_net)) {
1550                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1551                 if (all == NULL)
1552                         goto err_alloc_all;
1553
1554                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1555                 if (dflt == NULL)
1556                         goto err_alloc_dflt;
1557
1558 #ifdef CONFIG_SYSCTL
1559                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1560                 if (tbl == NULL)
1561                         goto err_alloc_ctl;
1562
1563                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1564                 tbl[0].extra1 = all;
1565                 tbl[0].extra2 = net;
1566 #endif
1567         }
1568
1569 #ifdef CONFIG_SYSCTL
1570         err = __devinet_sysctl_register(net, "all", all);
1571         if (err < 0)
1572                 goto err_reg_all;
1573
1574         err = __devinet_sysctl_register(net, "default", dflt);
1575         if (err < 0)
1576                 goto err_reg_dflt;
1577
1578         err = -ENOMEM;
1579         forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1580         if (forw_hdr == NULL)
1581                 goto err_reg_ctl;
1582         net->ipv4.forw_hdr = forw_hdr;
1583 #endif
1584
1585         net->ipv4.devconf_all = all;
1586         net->ipv4.devconf_dflt = dflt;
1587         return 0;
1588
1589 #ifdef CONFIG_SYSCTL
1590 err_reg_ctl:
1591         __devinet_sysctl_unregister(dflt);
1592 err_reg_dflt:
1593         __devinet_sysctl_unregister(all);
1594 err_reg_all:
1595         if (tbl != ctl_forward_entry)
1596                 kfree(tbl);
1597 err_alloc_ctl:
1598 #endif
1599         if (dflt != &ipv4_devconf_dflt)
1600                 kfree(dflt);
1601 err_alloc_dflt:
1602         if (all != &ipv4_devconf)
1603                 kfree(all);
1604 err_alloc_all:
1605         return err;
1606 }
1607
1608 static __net_exit void devinet_exit_net(struct net *net)
1609 {
1610 #ifdef CONFIG_SYSCTL
1611         struct ctl_table *tbl;
1612
1613         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1614         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1615         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1616         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1617         kfree(tbl);
1618 #endif
1619         kfree(net->ipv4.devconf_dflt);
1620         kfree(net->ipv4.devconf_all);
1621 }
1622
1623 static __net_initdata struct pernet_operations devinet_ops = {
1624         .init = devinet_init_net,
1625         .exit = devinet_exit_net,
1626 };
1627
1628 void __init devinet_init(void)
1629 {
1630         register_pernet_subsys(&devinet_ops);
1631
1632         register_gifconf(PF_INET, inet_gifconf);
1633         register_netdevice_notifier(&ip_netdev_notifier);
1634
1635         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1636         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1637         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1638 }
1639