2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: FIB frontend.
8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <asm/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/sched.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
46 #include <net/ip_fib.h>
48 #define FFprint(a...) printk(KERN_DEBUG a)
50 #ifndef CONFIG_IP_MULTIPLE_TABLES
52 #define RT_TABLE_MIN RT_TABLE_MAIN
54 struct fib_table *local_table;
55 struct fib_table *main_table;
59 #define RT_TABLE_MIN 1
61 struct fib_table *fib_tables[RT_TABLE_MAX+1];
63 struct fib_table *__fib_new_table(int id)
67 tb = fib_hash_init(id);
75 #endif /* CONFIG_IP_MULTIPLE_TABLES */
81 #ifdef CONFIG_IP_MULTIPLE_TABLES
85 for (id = RT_TABLE_MAX; id>0; id--) {
86 if ((tb = fib_get_table(id))==NULL)
88 flushed += tb->tb_flush(tb);
90 #else /* CONFIG_IP_MULTIPLE_TABLES */
91 flushed += main_table->tb_flush(main_table);
92 flushed += local_table->tb_flush(local_table);
93 #endif /* CONFIG_IP_MULTIPLE_TABLES */
100 #ifdef CONFIG_PROC_FS
103 * Called from the PROCfs module. This outputs /proc/net/route.
105 * It always works in backward compatibility mode.
106 * The format of the file is not supposed to be changed.
110 fib_get_procinfo(char *buffer, char **start, off_t offset, int length)
112 int first = offset/128;
114 int count = (length+127)/128;
117 *start = buffer + offset%128;
120 sprintf(buffer, "%-127s\n", "Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT");
126 if (main_table && count > 0) {
127 int n = main_table->tb_get_info(main_table, ptr, first, count);
139 #endif /* CONFIG_PROC_FS */
142 * Find the first device with a given source address.
145 struct net_device * ip_dev_find(u32 addr)
148 struct fib_result res;
149 struct net_device *dev = NULL;
151 memset(&key, 0, sizeof(key));
153 #ifdef CONFIG_IP_MULTIPLE_TABLES
157 if (!local_table || local_table->tb_lookup(local_table, &key, &res)) {
160 if (res.type != RTN_LOCAL)
162 dev = FIB_RES_DEV(res);
171 unsigned inet_addr_type(u32 addr)
174 struct fib_result res;
175 unsigned ret = RTN_BROADCAST;
177 if (ZERONET(addr) || BADCLASS(addr))
178 return RTN_BROADCAST;
180 return RTN_MULTICAST;
182 memset(&key, 0, sizeof(key));
184 #ifdef CONFIG_IP_MULTIPLE_TABLES
190 if (local_table->tb_lookup(local_table, &key, &res) == 0) {
198 /* Given (packet source, input interface) and optional (dst, oif, tos):
199 - (main) check, that source is valid i.e. not broadcast or our local
201 - figure out what "logical" interface this packet arrived
202 and calculate "specific destination" address.
203 - check, that packet arrived from expected physical interface.
206 int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
207 struct net_device *dev, u32 *spec_dst, u32 *itag)
209 struct in_device *in_dev;
211 struct fib_result res;
220 key.scope = RT_SCOPE_UNIVERSE;
223 read_lock(&inetdev_lock);
224 in_dev = __in_dev_get(dev);
226 no_addr = in_dev->ifa_list == NULL;
227 rpf = IN_DEV_RPFILTER(in_dev);
229 read_unlock(&inetdev_lock);
234 if (fib_lookup(&key, &res))
236 if (res.type != RTN_UNICAST)
238 *spec_dst = FIB_RES_PREFSRC(res);
239 fib_combine_itag(itag, &res);
240 #ifdef CONFIG_IP_ROUTE_MULTIPATH
241 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
243 if (FIB_RES_DEV(res) == dev)
246 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
255 key.oif = dev->ifindex;
258 if (fib_lookup(&key, &res) == 0) {
259 if (res.type == RTN_UNICAST) {
260 *spec_dst = FIB_RES_PREFSRC(res);
261 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
270 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
280 #ifndef CONFIG_IP_NOSIOCRT
283 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
286 int ip_rt_ioctl(unsigned int cmd, void *arg)
297 case SIOCADDRT: /* Add a route */
298 case SIOCDELRT: /* Delete a route */
299 if (!capable(CAP_NET_ADMIN))
301 if (copy_from_user(&r, arg, sizeof(struct rtentry)))
304 err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r);
306 if (cmd == SIOCDELRT) {
307 struct fib_table *tb = fib_get_table(req.rtm.rtm_table);
310 err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
312 struct fib_table *tb = fib_new_table(req.rtm.rtm_table);
315 err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
328 int ip_rt_ioctl(unsigned int cmd, void *arg)
335 static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
339 for (i=1; i<=RTA_MAX; i++) {
340 struct rtattr *attr = rta[i-1];
342 if (RTA_PAYLOAD(attr) < 4)
344 if (i != RTA_MULTIPATH && i != RTA_METRICS)
345 rta[i-1] = (struct rtattr*)RTA_DATA(attr);
351 int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
353 struct fib_table * tb;
354 struct rtattr **rta = arg;
355 struct rtmsg *r = NLMSG_DATA(nlh);
357 if (inet_check_attr(r, rta))
360 tb = fib_get_table(r->rtm_table);
362 return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
366 int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
368 struct fib_table * tb;
369 struct rtattr **rta = arg;
370 struct rtmsg *r = NLMSG_DATA(nlh);
372 if (inet_check_attr(r, rta))
375 tb = fib_new_table(r->rtm_table);
377 return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
381 int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
385 struct fib_table *tb;
387 if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
388 ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
389 return ip_rt_dump(skb, cb);
393 s_t = cb->args[0] = RT_TABLE_MIN;
395 for (t=s_t; t<=RT_TABLE_MAX; t++) {
396 if (t < s_t) continue;
398 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
399 if ((tb = fib_get_table(t))==NULL)
401 if (tb->tb_dump(tb, skb, cb) < 0)
410 /* Prepare and feed intra-kernel routing request.
411 Really, it should be netlink message, but :-( netlink
412 can be not configured, so that we feed it directly
413 to fib engine. It is legal, because all events occur
414 only when netlink is already locked.
417 static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa)
419 struct fib_table * tb;
426 memset(&req.rtm, 0, sizeof(req.rtm));
427 memset(&rta, 0, sizeof(rta));
429 if (type == RTN_UNICAST)
430 tb = fib_new_table(RT_TABLE_MAIN);
432 tb = fib_new_table(RT_TABLE_LOCAL);
437 req.nlh.nlmsg_len = sizeof(req);
438 req.nlh.nlmsg_type = cmd;
439 req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
440 req.nlh.nlmsg_pid = 0;
441 req.nlh.nlmsg_seq = 0;
443 req.rtm.rtm_dst_len = dst_len;
444 req.rtm.rtm_table = tb->tb_id;
445 req.rtm.rtm_protocol = RTPROT_KERNEL;
446 req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
447 req.rtm.rtm_type = type;
450 rta.rta_prefsrc = &ifa->ifa_local;
451 rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
453 if (cmd == RTM_NEWROUTE)
454 tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
456 tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
459 static void fib_add_ifaddr(struct in_ifaddr *ifa)
461 struct in_device *in_dev = ifa->ifa_dev;
462 struct net_device *dev = in_dev->dev;
463 struct in_ifaddr *prim = ifa;
464 u32 mask = ifa->ifa_mask;
465 u32 addr = ifa->ifa_local;
466 u32 prefix = ifa->ifa_address&mask;
468 if (ifa->ifa_flags&IFA_F_SECONDARY) {
469 prim = inet_ifa_byprefix(in_dev, prefix, mask);
471 printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
476 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
478 if (!(dev->flags&IFF_UP))
481 /* Add broadcast address, if it is explicitly assigned. */
482 if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF)
483 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
485 if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
486 (prefix != addr || ifa->ifa_prefixlen < 32)) {
487 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
488 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
490 /* Add network specific broadcasts, when it takes a sense */
491 if (ifa->ifa_prefixlen < 31) {
492 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
493 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
498 static void fib_del_ifaddr(struct in_ifaddr *ifa)
500 struct in_device *in_dev = ifa->ifa_dev;
501 struct net_device *dev = in_dev->dev;
502 struct in_ifaddr *ifa1;
503 struct in_ifaddr *prim = ifa;
504 u32 brd = ifa->ifa_address|~ifa->ifa_mask;
505 u32 any = ifa->ifa_address&ifa->ifa_mask;
512 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
513 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
514 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
516 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
518 printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
523 /* Deletion is more complicated than add.
524 We should take care of not to delete too much :-)
526 Scan address list to be sure that addresses are really gone.
529 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
530 if (ifa->ifa_local == ifa1->ifa_local)
532 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
534 if (brd == ifa1->ifa_broadcast)
536 if (any == ifa1->ifa_broadcast)
541 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
543 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
545 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
546 if (!(ok&LOCAL_OK)) {
547 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
549 /* Check, that this local address finally disappeared. */
550 if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
551 /* And the last, but not the least thing.
552 We must flush stray FIB entries.
554 First of all, we scan fib_info list searching
555 for stray nexthop entries, then ignite fib_flush.
557 if (fib_sync_down(ifa->ifa_local, NULL, 0))
567 static void fib_disable_ip(struct net_device *dev, int force)
569 if (fib_sync_down(0, dev, force))
575 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
577 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
582 #ifdef CONFIG_IP_ROUTE_MULTIPATH
583 fib_sync_up(ifa->ifa_dev->dev);
589 if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) {
590 /* Last address was deleted from this interface.
593 fib_disable_ip(ifa->ifa_dev->dev, 1);
602 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
604 struct net_device *dev = ptr;
605 struct in_device *in_dev = __in_dev_get(dev);
607 if (event == NETDEV_UNREGISTER) {
608 fib_disable_ip(dev, 2);
619 } endfor_ifa(in_dev);
620 #ifdef CONFIG_IP_ROUTE_MULTIPATH
626 fib_disable_ip(dev, 0);
628 case NETDEV_CHANGEMTU:
636 struct notifier_block fib_inetaddr_notifier = {
637 notifier_call: fib_inetaddr_event,
640 struct notifier_block fib_netdev_notifier = {
641 notifier_call: fib_netdev_event,
644 void __init ip_fib_init(void)
646 #ifdef CONFIG_PROC_FS
647 proc_net_create("route",0,fib_get_procinfo);
648 #endif /* CONFIG_PROC_FS */
650 #ifndef CONFIG_IP_MULTIPLE_TABLES
651 local_table = fib_hash_init(RT_TABLE_LOCAL);
652 main_table = fib_hash_init(RT_TABLE_MAIN);
657 register_netdevice_notifier(&fib_netdev_notifier);
658 register_inetaddr_notifier(&fib_inetaddr_notifier);