OSDN Git Service

ceph: fix use-after-free on symlink traversal
[uclinux-h8/linux.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <linux/jhash.h>
48 #include <net/net_namespace.h>
49 #include <net/snmp.h>
50 #include <net/ipv6.h>
51 #include <net/ip6_fib.h>
52 #include <net/ip6_route.h>
53 #include <net/ndisc.h>
54 #include <net/addrconf.h>
55 #include <net/tcp.h>
56 #include <linux/rtnetlink.h>
57 #include <net/dst.h>
58 #include <net/dst_metadata.h>
59 #include <net/xfrm.h>
60 #include <net/netevent.h>
61 #include <net/netlink.h>
62 #include <net/nexthop.h>
63 #include <net/lwtunnel.h>
64 #include <net/ip_tunnels.h>
65 #include <net/l3mdev.h>
66 #include <net/ip.h>
67 #include <linux/uaccess.h>
68
69 #ifdef CONFIG_SYSCTL
70 #include <linux/sysctl.h>
71 #endif
72
73 static int ip6_rt_type_to_error(u8 fib6_type);
74
75 #define CREATE_TRACE_POINTS
76 #include <trace/events/fib6.h>
77 EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
78 #undef CREATE_TRACE_POINTS
79
80 enum rt6_nud_state {
81         RT6_NUD_FAIL_HARD = -3,
82         RT6_NUD_FAIL_PROBE = -2,
83         RT6_NUD_FAIL_DO_RR = -1,
84         RT6_NUD_SUCCEED = 1
85 };
86
87 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
88 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
89 static unsigned int      ip6_mtu(const struct dst_entry *dst);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void             ip6_dst_destroy(struct dst_entry *);
92 static void             ip6_dst_ifdown(struct dst_entry *,
93                                        struct net_device *dev, int how);
94 static int               ip6_dst_gc(struct dst_ops *ops);
95
96 static int              ip6_pkt_discard(struct sk_buff *skb);
97 static int              ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
98 static int              ip6_pkt_prohibit(struct sk_buff *skb);
99 static int              ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
100 static void             ip6_link_failure(struct sk_buff *skb);
101 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
102                                            struct sk_buff *skb, u32 mtu);
103 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
104                                         struct sk_buff *skb);
105 static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
106 static size_t rt6_nlmsg_size(struct fib6_info *rt);
107 static int rt6_fill_node(struct net *net, struct sk_buff *skb,
108                          struct fib6_info *rt, struct dst_entry *dst,
109                          struct in6_addr *dest, struct in6_addr *src,
110                          int iif, int type, u32 portid, u32 seq,
111                          unsigned int flags);
112 static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
113                                            struct in6_addr *daddr,
114                                            struct in6_addr *saddr);
115
116 #ifdef CONFIG_IPV6_ROUTE_INFO
117 static struct fib6_info *rt6_add_route_info(struct net *net,
118                                            const struct in6_addr *prefix, int prefixlen,
119                                            const struct in6_addr *gwaddr,
120                                            struct net_device *dev,
121                                            unsigned int pref);
122 static struct fib6_info *rt6_get_route_info(struct net *net,
123                                            const struct in6_addr *prefix, int prefixlen,
124                                            const struct in6_addr *gwaddr,
125                                            struct net_device *dev);
126 #endif
127
128 struct uncached_list {
129         spinlock_t              lock;
130         struct list_head        head;
131 };
132
133 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
134
135 void rt6_uncached_list_add(struct rt6_info *rt)
136 {
137         struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
138
139         rt->rt6i_uncached_list = ul;
140
141         spin_lock_bh(&ul->lock);
142         list_add_tail(&rt->rt6i_uncached, &ul->head);
143         spin_unlock_bh(&ul->lock);
144 }
145
146 void rt6_uncached_list_del(struct rt6_info *rt)
147 {
148         if (!list_empty(&rt->rt6i_uncached)) {
149                 struct uncached_list *ul = rt->rt6i_uncached_list;
150                 struct net *net = dev_net(rt->dst.dev);
151
152                 spin_lock_bh(&ul->lock);
153                 list_del(&rt->rt6i_uncached);
154                 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
155                 spin_unlock_bh(&ul->lock);
156         }
157 }
158
159 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
160 {
161         struct net_device *loopback_dev = net->loopback_dev;
162         int cpu;
163
164         if (dev == loopback_dev)
165                 return;
166
167         for_each_possible_cpu(cpu) {
168                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
169                 struct rt6_info *rt;
170
171                 spin_lock_bh(&ul->lock);
172                 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
173                         struct inet6_dev *rt_idev = rt->rt6i_idev;
174                         struct net_device *rt_dev = rt->dst.dev;
175
176                         if (rt_idev->dev == dev) {
177                                 rt->rt6i_idev = in6_dev_get(loopback_dev);
178                                 in6_dev_put(rt_idev);
179                         }
180
181                         if (rt_dev == dev) {
182                                 rt->dst.dev = loopback_dev;
183                                 dev_hold(rt->dst.dev);
184                                 dev_put(rt_dev);
185                         }
186                 }
187                 spin_unlock_bh(&ul->lock);
188         }
189 }
190
191 static inline const void *choose_neigh_daddr(const struct in6_addr *p,
192                                              struct sk_buff *skb,
193                                              const void *daddr)
194 {
195         if (!ipv6_addr_any(p))
196                 return (const void *) p;
197         else if (skb)
198                 return &ipv6_hdr(skb)->daddr;
199         return daddr;
200 }
201
202 struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
203                                    struct net_device *dev,
204                                    struct sk_buff *skb,
205                                    const void *daddr)
206 {
207         struct neighbour *n;
208
209         daddr = choose_neigh_daddr(gw, skb, daddr);
210         n = __ipv6_neigh_lookup(dev, daddr);
211         if (n)
212                 return n;
213
214         n = neigh_create(&nd_tbl, daddr, dev);
215         return IS_ERR(n) ? NULL : n;
216 }
217
218 static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
219                                               struct sk_buff *skb,
220                                               const void *daddr)
221 {
222         const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
223
224         return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
225 }
226
227 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
228 {
229         struct net_device *dev = dst->dev;
230         struct rt6_info *rt = (struct rt6_info *)dst;
231
232         daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
233         if (!daddr)
234                 return;
235         if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
236                 return;
237         if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
238                 return;
239         __ipv6_confirm_neigh(dev, daddr);
240 }
241
242 static struct dst_ops ip6_dst_ops_template = {
243         .family                 =       AF_INET6,
244         .gc                     =       ip6_dst_gc,
245         .gc_thresh              =       1024,
246         .check                  =       ip6_dst_check,
247         .default_advmss         =       ip6_default_advmss,
248         .mtu                    =       ip6_mtu,
249         .cow_metrics            =       dst_cow_metrics_generic,
250         .destroy                =       ip6_dst_destroy,
251         .ifdown                 =       ip6_dst_ifdown,
252         .negative_advice        =       ip6_negative_advice,
253         .link_failure           =       ip6_link_failure,
254         .update_pmtu            =       ip6_rt_update_pmtu,
255         .redirect               =       rt6_do_redirect,
256         .local_out              =       __ip6_local_out,
257         .neigh_lookup           =       ip6_dst_neigh_lookup,
258         .confirm_neigh          =       ip6_confirm_neigh,
259 };
260
261 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
262 {
263         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
264
265         return mtu ? : dst->dev->mtu;
266 }
267
268 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
269                                          struct sk_buff *skb, u32 mtu)
270 {
271 }
272
273 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
274                                       struct sk_buff *skb)
275 {
276 }
277
278 static struct dst_ops ip6_dst_blackhole_ops = {
279         .family                 =       AF_INET6,
280         .destroy                =       ip6_dst_destroy,
281         .check                  =       ip6_dst_check,
282         .mtu                    =       ip6_blackhole_mtu,
283         .default_advmss         =       ip6_default_advmss,
284         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
285         .redirect               =       ip6_rt_blackhole_redirect,
286         .cow_metrics            =       dst_cow_metrics_generic,
287         .neigh_lookup           =       ip6_dst_neigh_lookup,
288 };
289
290 static const u32 ip6_template_metrics[RTAX_MAX] = {
291         [RTAX_HOPLIMIT - 1] = 0,
292 };
293
294 static const struct fib6_info fib6_null_entry_template = {
295         .fib6_flags     = (RTF_REJECT | RTF_NONEXTHOP),
296         .fib6_protocol  = RTPROT_KERNEL,
297         .fib6_metric    = ~(u32)0,
298         .fib6_ref       = ATOMIC_INIT(1),
299         .fib6_type      = RTN_UNREACHABLE,
300         .fib6_metrics   = (struct dst_metrics *)&dst_default_metrics,
301 };
302
303 static const struct rt6_info ip6_null_entry_template = {
304         .dst = {
305                 .__refcnt       = ATOMIC_INIT(1),
306                 .__use          = 1,
307                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
308                 .error          = -ENETUNREACH,
309                 .input          = ip6_pkt_discard,
310                 .output         = ip6_pkt_discard_out,
311         },
312         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
313 };
314
315 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
316
317 static const struct rt6_info ip6_prohibit_entry_template = {
318         .dst = {
319                 .__refcnt       = ATOMIC_INIT(1),
320                 .__use          = 1,
321                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
322                 .error          = -EACCES,
323                 .input          = ip6_pkt_prohibit,
324                 .output         = ip6_pkt_prohibit_out,
325         },
326         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
327 };
328
329 static const struct rt6_info ip6_blk_hole_entry_template = {
330         .dst = {
331                 .__refcnt       = ATOMIC_INIT(1),
332                 .__use          = 1,
333                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
334                 .error          = -EINVAL,
335                 .input          = dst_discard,
336                 .output         = dst_discard_out,
337         },
338         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
339 };
340
341 #endif
342
343 static void rt6_info_init(struct rt6_info *rt)
344 {
345         struct dst_entry *dst = &rt->dst;
346
347         memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
348         INIT_LIST_HEAD(&rt->rt6i_uncached);
349 }
350
351 /* allocate dst with ip6_dst_ops */
352 struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
353                                int flags)
354 {
355         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
356                                         1, DST_OBSOLETE_FORCE_CHK, flags);
357
358         if (rt) {
359                 rt6_info_init(rt);
360                 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
361         }
362
363         return rt;
364 }
365 EXPORT_SYMBOL(ip6_dst_alloc);
366
367 static void ip6_dst_destroy(struct dst_entry *dst)
368 {
369         struct rt6_info *rt = (struct rt6_info *)dst;
370         struct fib6_info *from;
371         struct inet6_dev *idev;
372
373         ip_dst_metrics_put(dst);
374         rt6_uncached_list_del(rt);
375
376         idev = rt->rt6i_idev;
377         if (idev) {
378                 rt->rt6i_idev = NULL;
379                 in6_dev_put(idev);
380         }
381
382         rcu_read_lock();
383         from = rcu_dereference(rt->from);
384         rcu_assign_pointer(rt->from, NULL);
385         fib6_info_release(from);
386         rcu_read_unlock();
387 }
388
389 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
390                            int how)
391 {
392         struct rt6_info *rt = (struct rt6_info *)dst;
393         struct inet6_dev *idev = rt->rt6i_idev;
394         struct net_device *loopback_dev =
395                 dev_net(dev)->loopback_dev;
396
397         if (idev && idev->dev != loopback_dev) {
398                 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
399                 if (loopback_idev) {
400                         rt->rt6i_idev = loopback_idev;
401                         in6_dev_put(idev);
402                 }
403         }
404 }
405
406 static bool __rt6_check_expired(const struct rt6_info *rt)
407 {
408         if (rt->rt6i_flags & RTF_EXPIRES)
409                 return time_after(jiffies, rt->dst.expires);
410         else
411                 return false;
412 }
413
414 static bool rt6_check_expired(const struct rt6_info *rt)
415 {
416         struct fib6_info *from;
417
418         from = rcu_dereference(rt->from);
419
420         if (rt->rt6i_flags & RTF_EXPIRES) {
421                 if (time_after(jiffies, rt->dst.expires))
422                         return true;
423         } else if (from) {
424                 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
425                         fib6_check_expired(from);
426         }
427         return false;
428 }
429
430 struct fib6_info *fib6_multipath_select(const struct net *net,
431                                         struct fib6_info *match,
432                                         struct flowi6 *fl6, int oif,
433                                         const struct sk_buff *skb,
434                                         int strict)
435 {
436         struct fib6_info *sibling, *next_sibling;
437
438         /* We might have already computed the hash for ICMPv6 errors. In such
439          * case it will always be non-zero. Otherwise now is the time to do it.
440          */
441         if (!fl6->mp_hash)
442                 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
443
444         if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
445                 return match;
446
447         list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
448                                  fib6_siblings) {
449                 int nh_upper_bound;
450
451                 nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
452                 if (fl6->mp_hash > nh_upper_bound)
453                         continue;
454                 if (rt6_score_route(sibling, oif, strict) < 0)
455                         break;
456                 match = sibling;
457                 break;
458         }
459
460         return match;
461 }
462
463 /*
464  *      Route lookup. rcu_read_lock() should be held.
465  */
466
467 static inline struct fib6_info *rt6_device_match(struct net *net,
468                                                  struct fib6_info *rt,
469                                                     const struct in6_addr *saddr,
470                                                     int oif,
471                                                     int flags)
472 {
473         struct fib6_info *sprt;
474
475         if (!oif && ipv6_addr_any(saddr) &&
476             !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
477                 return rt;
478
479         for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
480                 const struct net_device *dev = sprt->fib6_nh.nh_dev;
481
482                 if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
483                         continue;
484
485                 if (oif) {
486                         if (dev->ifindex == oif)
487                                 return sprt;
488                 } else {
489                         if (ipv6_chk_addr(net, saddr, dev,
490                                           flags & RT6_LOOKUP_F_IFACE))
491                                 return sprt;
492                 }
493         }
494
495         if (oif && flags & RT6_LOOKUP_F_IFACE)
496                 return net->ipv6.fib6_null_entry;
497
498         return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
499 }
500
501 #ifdef CONFIG_IPV6_ROUTER_PREF
502 struct __rt6_probe_work {
503         struct work_struct work;
504         struct in6_addr target;
505         struct net_device *dev;
506 };
507
508 static void rt6_probe_deferred(struct work_struct *w)
509 {
510         struct in6_addr mcaddr;
511         struct __rt6_probe_work *work =
512                 container_of(w, struct __rt6_probe_work, work);
513
514         addrconf_addr_solict_mult(&work->target, &mcaddr);
515         ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
516         dev_put(work->dev);
517         kfree(work);
518 }
519
520 static void rt6_probe(struct fib6_info *rt)
521 {
522         struct __rt6_probe_work *work = NULL;
523         const struct in6_addr *nh_gw;
524         struct neighbour *neigh;
525         struct net_device *dev;
526         struct inet6_dev *idev;
527
528         /*
529          * Okay, this does not seem to be appropriate
530          * for now, however, we need to check if it
531          * is really so; aka Router Reachability Probing.
532          *
533          * Router Reachability Probe MUST be rate-limited
534          * to no more than one per minute.
535          */
536         if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
537                 return;
538
539         nh_gw = &rt->fib6_nh.nh_gw;
540         dev = rt->fib6_nh.nh_dev;
541         rcu_read_lock_bh();
542         idev = __in6_dev_get(dev);
543         neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
544         if (neigh) {
545                 if (neigh->nud_state & NUD_VALID)
546                         goto out;
547
548                 write_lock(&neigh->lock);
549                 if (!(neigh->nud_state & NUD_VALID) &&
550                     time_after(jiffies,
551                                neigh->updated + idev->cnf.rtr_probe_interval)) {
552                         work = kmalloc(sizeof(*work), GFP_ATOMIC);
553                         if (work)
554                                 __neigh_set_probe_once(neigh);
555                 }
556                 write_unlock(&neigh->lock);
557         } else if (time_after(jiffies, rt->last_probe +
558                                        idev->cnf.rtr_probe_interval)) {
559                 work = kmalloc(sizeof(*work), GFP_ATOMIC);
560         }
561
562         if (work) {
563                 rt->last_probe = jiffies;
564                 INIT_WORK(&work->work, rt6_probe_deferred);
565                 work->target = *nh_gw;
566                 dev_hold(dev);
567                 work->dev = dev;
568                 schedule_work(&work->work);
569         }
570
571 out:
572         rcu_read_unlock_bh();
573 }
574 #else
575 static inline void rt6_probe(struct fib6_info *rt)
576 {
577 }
578 #endif
579
580 /*
581  * Default Router Selection (RFC 2461 6.3.6)
582  */
583 static inline int rt6_check_dev(struct fib6_info *rt, int oif)
584 {
585         const struct net_device *dev = rt->fib6_nh.nh_dev;
586
587         if (!oif || dev->ifindex == oif)
588                 return 2;
589         return 0;
590 }
591
592 static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
593 {
594         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
595         struct neighbour *neigh;
596
597         if (rt->fib6_flags & RTF_NONEXTHOP ||
598             !(rt->fib6_flags & RTF_GATEWAY))
599                 return RT6_NUD_SUCCEED;
600
601         rcu_read_lock_bh();
602         neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
603                                           &rt->fib6_nh.nh_gw);
604         if (neigh) {
605                 read_lock(&neigh->lock);
606                 if (neigh->nud_state & NUD_VALID)
607                         ret = RT6_NUD_SUCCEED;
608 #ifdef CONFIG_IPV6_ROUTER_PREF
609                 else if (!(neigh->nud_state & NUD_FAILED))
610                         ret = RT6_NUD_SUCCEED;
611                 else
612                         ret = RT6_NUD_FAIL_PROBE;
613 #endif
614                 read_unlock(&neigh->lock);
615         } else {
616                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
617                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
618         }
619         rcu_read_unlock_bh();
620
621         return ret;
622 }
623
624 static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
625 {
626         int m;
627
628         m = rt6_check_dev(rt, oif);
629         if (!m && (strict & RT6_LOOKUP_F_IFACE))
630                 return RT6_NUD_FAIL_HARD;
631 #ifdef CONFIG_IPV6_ROUTER_PREF
632         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
633 #endif
634         if (strict & RT6_LOOKUP_F_REACHABLE) {
635                 int n = rt6_check_neigh(rt);
636                 if (n < 0)
637                         return n;
638         }
639         return m;
640 }
641
642 /* called with rc_read_lock held */
643 static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
644 {
645         const struct net_device *dev = fib6_info_nh_dev(f6i);
646         bool rc = false;
647
648         if (dev) {
649                 const struct inet6_dev *idev = __in6_dev_get(dev);
650
651                 rc = !!idev->cnf.ignore_routes_with_linkdown;
652         }
653
654         return rc;
655 }
656
657 static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
658                                    int *mpri, struct fib6_info *match,
659                                    bool *do_rr)
660 {
661         int m;
662         bool match_do_rr = false;
663
664         if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
665                 goto out;
666
667         if (fib6_ignore_linkdown(rt) &&
668             rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
669             !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
670                 goto out;
671
672         if (fib6_check_expired(rt))
673                 goto out;
674
675         m = rt6_score_route(rt, oif, strict);
676         if (m == RT6_NUD_FAIL_DO_RR) {
677                 match_do_rr = true;
678                 m = 0; /* lowest valid score */
679         } else if (m == RT6_NUD_FAIL_HARD) {
680                 goto out;
681         }
682
683         if (strict & RT6_LOOKUP_F_REACHABLE)
684                 rt6_probe(rt);
685
686         /* note that m can be RT6_NUD_FAIL_PROBE at this point */
687         if (m > *mpri) {
688                 *do_rr = match_do_rr;
689                 *mpri = m;
690                 match = rt;
691         }
692 out:
693         return match;
694 }
695
696 static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
697                                      struct fib6_info *leaf,
698                                      struct fib6_info *rr_head,
699                                      u32 metric, int oif, int strict,
700                                      bool *do_rr)
701 {
702         struct fib6_info *rt, *match, *cont;
703         int mpri = -1;
704
705         match = NULL;
706         cont = NULL;
707         for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
708                 if (rt->fib6_metric != metric) {
709                         cont = rt;
710                         break;
711                 }
712
713                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
714         }
715
716         for (rt = leaf; rt && rt != rr_head;
717              rt = rcu_dereference(rt->fib6_next)) {
718                 if (rt->fib6_metric != metric) {
719                         cont = rt;
720                         break;
721                 }
722
723                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
724         }
725
726         if (match || !cont)
727                 return match;
728
729         for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
730                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
731
732         return match;
733 }
734
735 static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
736                                    int oif, int strict)
737 {
738         struct fib6_info *leaf = rcu_dereference(fn->leaf);
739         struct fib6_info *match, *rt0;
740         bool do_rr = false;
741         int key_plen;
742
743         if (!leaf || leaf == net->ipv6.fib6_null_entry)
744                 return net->ipv6.fib6_null_entry;
745
746         rt0 = rcu_dereference(fn->rr_ptr);
747         if (!rt0)
748                 rt0 = leaf;
749
750         /* Double check to make sure fn is not an intermediate node
751          * and fn->leaf does not points to its child's leaf
752          * (This might happen if all routes under fn are deleted from
753          * the tree and fib6_repair_tree() is called on the node.)
754          */
755         key_plen = rt0->fib6_dst.plen;
756 #ifdef CONFIG_IPV6_SUBTREES
757         if (rt0->fib6_src.plen)
758                 key_plen = rt0->fib6_src.plen;
759 #endif
760         if (fn->fn_bit != key_plen)
761                 return net->ipv6.fib6_null_entry;
762
763         match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
764                              &do_rr);
765
766         if (do_rr) {
767                 struct fib6_info *next = rcu_dereference(rt0->fib6_next);
768
769                 /* no entries matched; do round-robin */
770                 if (!next || next->fib6_metric != rt0->fib6_metric)
771                         next = leaf;
772
773                 if (next != rt0) {
774                         spin_lock_bh(&leaf->fib6_table->tb6_lock);
775                         /* make sure next is not being deleted from the tree */
776                         if (next->fib6_node)
777                                 rcu_assign_pointer(fn->rr_ptr, next);
778                         spin_unlock_bh(&leaf->fib6_table->tb6_lock);
779                 }
780         }
781
782         return match ? match : net->ipv6.fib6_null_entry;
783 }
784
785 static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
786 {
787         return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
788 }
789
790 #ifdef CONFIG_IPV6_ROUTE_INFO
791 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
792                   const struct in6_addr *gwaddr)
793 {
794         struct net *net = dev_net(dev);
795         struct route_info *rinfo = (struct route_info *) opt;
796         struct in6_addr prefix_buf, *prefix;
797         unsigned int pref;
798         unsigned long lifetime;
799         struct fib6_info *rt;
800
801         if (len < sizeof(struct route_info)) {
802                 return -EINVAL;
803         }
804
805         /* Sanity check for prefix_len and length */
806         if (rinfo->length > 3) {
807                 return -EINVAL;
808         } else if (rinfo->prefix_len > 128) {
809                 return -EINVAL;
810         } else if (rinfo->prefix_len > 64) {
811                 if (rinfo->length < 2) {
812                         return -EINVAL;
813                 }
814         } else if (rinfo->prefix_len > 0) {
815                 if (rinfo->length < 1) {
816                         return -EINVAL;
817                 }
818         }
819
820         pref = rinfo->route_pref;
821         if (pref == ICMPV6_ROUTER_PREF_INVALID)
822                 return -EINVAL;
823
824         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
825
826         if (rinfo->length == 3)
827                 prefix = (struct in6_addr *)rinfo->prefix;
828         else {
829                 /* this function is safe */
830                 ipv6_addr_prefix(&prefix_buf,
831                                  (struct in6_addr *)rinfo->prefix,
832                                  rinfo->prefix_len);
833                 prefix = &prefix_buf;
834         }
835
836         if (rinfo->prefix_len == 0)
837                 rt = rt6_get_dflt_router(net, gwaddr, dev);
838         else
839                 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
840                                         gwaddr, dev);
841
842         if (rt && !lifetime) {
843                 ip6_del_rt(net, rt);
844                 rt = NULL;
845         }
846
847         if (!rt && lifetime)
848                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
849                                         dev, pref);
850         else if (rt)
851                 rt->fib6_flags = RTF_ROUTEINFO |
852                                  (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
853
854         if (rt) {
855                 if (!addrconf_finite_timeout(lifetime))
856                         fib6_clean_expires(rt);
857                 else
858                         fib6_set_expires(rt, jiffies + HZ * lifetime);
859
860                 fib6_info_release(rt);
861         }
862         return 0;
863 }
864 #endif
865
866 /*
867  *      Misc support functions
868  */
869
870 /* called with rcu_lock held */
871 static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
872 {
873         struct net_device *dev = rt->fib6_nh.nh_dev;
874
875         if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
876                 /* for copies of local routes, dst->dev needs to be the
877                  * device if it is a master device, the master device if
878                  * device is enslaved, and the loopback as the default
879                  */
880                 if (netif_is_l3_slave(dev) &&
881                     !rt6_need_strict(&rt->fib6_dst.addr))
882                         dev = l3mdev_master_dev_rcu(dev);
883                 else if (!netif_is_l3_master(dev))
884                         dev = dev_net(dev)->loopback_dev;
885                 /* last case is netif_is_l3_master(dev) is true in which
886                  * case we want dev returned to be dev
887                  */
888         }
889
890         return dev;
891 }
892
893 static const int fib6_prop[RTN_MAX + 1] = {
894         [RTN_UNSPEC]    = 0,
895         [RTN_UNICAST]   = 0,
896         [RTN_LOCAL]     = 0,
897         [RTN_BROADCAST] = 0,
898         [RTN_ANYCAST]   = 0,
899         [RTN_MULTICAST] = 0,
900         [RTN_BLACKHOLE] = -EINVAL,
901         [RTN_UNREACHABLE] = -EHOSTUNREACH,
902         [RTN_PROHIBIT]  = -EACCES,
903         [RTN_THROW]     = -EAGAIN,
904         [RTN_NAT]       = -EINVAL,
905         [RTN_XRESOLVE]  = -EINVAL,
906 };
907
908 static int ip6_rt_type_to_error(u8 fib6_type)
909 {
910         return fib6_prop[fib6_type];
911 }
912
913 static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
914 {
915         unsigned short flags = 0;
916
917         if (rt->dst_nocount)
918                 flags |= DST_NOCOUNT;
919         if (rt->dst_nopolicy)
920                 flags |= DST_NOPOLICY;
921         if (rt->dst_host)
922                 flags |= DST_HOST;
923
924         return flags;
925 }
926
927 static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
928 {
929         rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
930
931         switch (ort->fib6_type) {
932         case RTN_BLACKHOLE:
933                 rt->dst.output = dst_discard_out;
934                 rt->dst.input = dst_discard;
935                 break;
936         case RTN_PROHIBIT:
937                 rt->dst.output = ip6_pkt_prohibit_out;
938                 rt->dst.input = ip6_pkt_prohibit;
939                 break;
940         case RTN_THROW:
941         case RTN_UNREACHABLE:
942         default:
943                 rt->dst.output = ip6_pkt_discard_out;
944                 rt->dst.input = ip6_pkt_discard;
945                 break;
946         }
947 }
948
949 static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
950 {
951         if (ort->fib6_flags & RTF_REJECT) {
952                 ip6_rt_init_dst_reject(rt, ort);
953                 return;
954         }
955
956         rt->dst.error = 0;
957         rt->dst.output = ip6_output;
958
959         if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
960                 rt->dst.input = ip6_input;
961         } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
962                 rt->dst.input = ip6_mc_input;
963         } else {
964                 rt->dst.input = ip6_forward;
965         }
966
967         if (ort->fib6_nh.nh_lwtstate) {
968                 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
969                 lwtunnel_set_redirect(&rt->dst);
970         }
971
972         rt->dst.lastuse = jiffies;
973 }
974
975 /* Caller must already hold reference to @from */
976 static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
977 {
978         rt->rt6i_flags &= ~RTF_EXPIRES;
979         rcu_assign_pointer(rt->from, from);
980         ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
981 }
982
983 /* Caller must already hold reference to @ort */
984 static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
985 {
986         struct net_device *dev = fib6_info_nh_dev(ort);
987
988         ip6_rt_init_dst(rt, ort);
989
990         rt->rt6i_dst = ort->fib6_dst;
991         rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
992         rt->rt6i_gateway = ort->fib6_nh.nh_gw;
993         rt->rt6i_flags = ort->fib6_flags;
994         rt6_set_from(rt, ort);
995 #ifdef CONFIG_IPV6_SUBTREES
996         rt->rt6i_src = ort->fib6_src;
997 #endif
998 }
999
1000 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1001                                         struct in6_addr *saddr)
1002 {
1003         struct fib6_node *pn, *sn;
1004         while (1) {
1005                 if (fn->fn_flags & RTN_TL_ROOT)
1006                         return NULL;
1007                 pn = rcu_dereference(fn->parent);
1008                 sn = FIB6_SUBTREE(pn);
1009                 if (sn && sn != fn)
1010                         fn = fib6_node_lookup(sn, NULL, saddr);
1011                 else
1012                         fn = pn;
1013                 if (fn->fn_flags & RTN_RTINFO)
1014                         return fn;
1015         }
1016 }
1017
1018 static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1019                           bool null_fallback)
1020 {
1021         struct rt6_info *rt = *prt;
1022
1023         if (dst_hold_safe(&rt->dst))
1024                 return true;
1025         if (null_fallback) {
1026                 rt = net->ipv6.ip6_null_entry;
1027                 dst_hold(&rt->dst);
1028         } else {
1029                 rt = NULL;
1030         }
1031         *prt = rt;
1032         return false;
1033 }
1034
1035 /* called with rcu_lock held */
1036 static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
1037 {
1038         unsigned short flags = fib6_info_dst_flags(rt);
1039         struct net_device *dev = rt->fib6_nh.nh_dev;
1040         struct rt6_info *nrt;
1041
1042         if (!fib6_info_hold_safe(rt))
1043                 return NULL;
1044
1045         nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1046         if (nrt)
1047                 ip6_rt_copy_init(nrt, rt);
1048         else
1049                 fib6_info_release(rt);
1050
1051         return nrt;
1052 }
1053
1054 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1055                                              struct fib6_table *table,
1056                                              struct flowi6 *fl6,
1057                                              const struct sk_buff *skb,
1058                                              int flags)
1059 {
1060         struct fib6_info *f6i;
1061         struct fib6_node *fn;
1062         struct rt6_info *rt;
1063
1064         if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1065                 flags &= ~RT6_LOOKUP_F_IFACE;
1066
1067         rcu_read_lock();
1068         fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1069 restart:
1070         f6i = rcu_dereference(fn->leaf);
1071         if (!f6i) {
1072                 f6i = net->ipv6.fib6_null_entry;
1073         } else {
1074                 f6i = rt6_device_match(net, f6i, &fl6->saddr,
1075                                       fl6->flowi6_oif, flags);
1076                 if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
1077                         f6i = fib6_multipath_select(net, f6i, fl6,
1078                                                     fl6->flowi6_oif, skb,
1079                                                     flags);
1080         }
1081         if (f6i == net->ipv6.fib6_null_entry) {
1082                 fn = fib6_backtrack(fn, &fl6->saddr);
1083                 if (fn)
1084                         goto restart;
1085         }
1086
1087         trace_fib6_table_lookup(net, f6i, table, fl6);
1088
1089         /* Search through exception table */
1090         rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1091         if (rt) {
1092                 if (ip6_hold_safe(net, &rt, true))
1093                         dst_use_noref(&rt->dst, jiffies);
1094         } else if (f6i == net->ipv6.fib6_null_entry) {
1095                 rt = net->ipv6.ip6_null_entry;
1096                 dst_hold(&rt->dst);
1097         } else {
1098                 rt = ip6_create_rt_rcu(f6i);
1099                 if (!rt) {
1100                         rt = net->ipv6.ip6_null_entry;
1101                         dst_hold(&rt->dst);
1102                 }
1103         }
1104
1105         rcu_read_unlock();
1106
1107         return rt;
1108 }
1109
1110 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
1111                                    const struct sk_buff *skb, int flags)
1112 {
1113         return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
1114 }
1115 EXPORT_SYMBOL_GPL(ip6_route_lookup);
1116
1117 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
1118                             const struct in6_addr *saddr, int oif,
1119                             const struct sk_buff *skb, int strict)
1120 {
1121         struct flowi6 fl6 = {
1122                 .flowi6_oif = oif,
1123                 .daddr = *daddr,
1124         };
1125         struct dst_entry *dst;
1126         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
1127
1128         if (saddr) {
1129                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
1130                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1131         }
1132
1133         dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
1134         if (dst->error == 0)
1135                 return (struct rt6_info *) dst;
1136
1137         dst_release(dst);
1138
1139         return NULL;
1140 }
1141 EXPORT_SYMBOL(rt6_lookup);
1142
1143 /* ip6_ins_rt is called with FREE table->tb6_lock.
1144  * It takes new route entry, the addition fails by any reason the
1145  * route is released.
1146  * Caller must hold dst before calling it.
1147  */
1148
1149 static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
1150                         struct netlink_ext_ack *extack)
1151 {
1152         int err;
1153         struct fib6_table *table;
1154
1155         table = rt->fib6_table;
1156         spin_lock_bh(&table->tb6_lock);
1157         err = fib6_add(&table->tb6_root, rt, info, extack);
1158         spin_unlock_bh(&table->tb6_lock);
1159
1160         return err;
1161 }
1162
1163 int ip6_ins_rt(struct net *net, struct fib6_info *rt)
1164 {
1165         struct nl_info info = { .nl_net = net, };
1166
1167         return __ip6_ins_rt(rt, &info, NULL);
1168 }
1169
1170 static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
1171                                            const struct in6_addr *daddr,
1172                                            const struct in6_addr *saddr)
1173 {
1174         struct net_device *dev;
1175         struct rt6_info *rt;
1176
1177         /*
1178          *      Clone the route.
1179          */
1180
1181         if (!fib6_info_hold_safe(ort))
1182                 return NULL;
1183
1184         dev = ip6_rt_get_dev_rcu(ort);
1185         rt = ip6_dst_alloc(dev_net(dev), dev, 0);
1186         if (!rt) {
1187                 fib6_info_release(ort);
1188                 return NULL;
1189         }
1190
1191         ip6_rt_copy_init(rt, ort);
1192         rt->rt6i_flags |= RTF_CACHE;
1193         rt->dst.flags |= DST_HOST;
1194         rt->rt6i_dst.addr = *daddr;
1195         rt->rt6i_dst.plen = 128;
1196
1197         if (!rt6_is_gw_or_nonexthop(ort)) {
1198                 if (ort->fib6_dst.plen != 128 &&
1199                     ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
1200                         rt->rt6i_flags |= RTF_ANYCAST;
1201 #ifdef CONFIG_IPV6_SUBTREES
1202                 if (rt->rt6i_src.plen && saddr) {
1203                         rt->rt6i_src.addr = *saddr;
1204                         rt->rt6i_src.plen = 128;
1205                 }
1206 #endif
1207         }
1208
1209         return rt;
1210 }
1211
1212 static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
1213 {
1214         unsigned short flags = fib6_info_dst_flags(rt);
1215         struct net_device *dev;
1216         struct rt6_info *pcpu_rt;
1217
1218         if (!fib6_info_hold_safe(rt))
1219                 return NULL;
1220
1221         rcu_read_lock();
1222         dev = ip6_rt_get_dev_rcu(rt);
1223         pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
1224         rcu_read_unlock();
1225         if (!pcpu_rt) {
1226                 fib6_info_release(rt);
1227                 return NULL;
1228         }
1229         ip6_rt_copy_init(pcpu_rt, rt);
1230         pcpu_rt->rt6i_flags |= RTF_PCPU;
1231         return pcpu_rt;
1232 }
1233
1234 /* It should be called with rcu_read_lock() acquired */
1235 static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
1236 {
1237         struct rt6_info *pcpu_rt, **p;
1238
1239         p = this_cpu_ptr(rt->rt6i_pcpu);
1240         pcpu_rt = *p;
1241
1242         if (pcpu_rt)
1243                 ip6_hold_safe(NULL, &pcpu_rt, false);
1244
1245         return pcpu_rt;
1246 }
1247
1248 static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1249                                             struct fib6_info *rt)
1250 {
1251         struct rt6_info *pcpu_rt, *prev, **p;
1252
1253         pcpu_rt = ip6_rt_pcpu_alloc(rt);
1254         if (!pcpu_rt) {
1255                 dst_hold(&net->ipv6.ip6_null_entry->dst);
1256                 return net->ipv6.ip6_null_entry;
1257         }
1258
1259         dst_hold(&pcpu_rt->dst);
1260         p = this_cpu_ptr(rt->rt6i_pcpu);
1261         prev = cmpxchg(p, NULL, pcpu_rt);
1262         BUG_ON(prev);
1263
1264         return pcpu_rt;
1265 }
1266
1267 /* exception hash table implementation
1268  */
1269 static DEFINE_SPINLOCK(rt6_exception_lock);
1270
1271 /* Remove rt6_ex from hash table and free the memory
1272  * Caller must hold rt6_exception_lock
1273  */
1274 static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1275                                  struct rt6_exception *rt6_ex)
1276 {
1277         struct fib6_info *from;
1278         struct net *net;
1279
1280         if (!bucket || !rt6_ex)
1281                 return;
1282
1283         net = dev_net(rt6_ex->rt6i->dst.dev);
1284         net->ipv6.rt6_stats->fib_rt_cache--;
1285
1286         /* purge completely the exception to allow releasing the held resources:
1287          * some [sk] cache may keep the dst around for unlimited time
1288          */
1289         from = rcu_dereference_protected(rt6_ex->rt6i->from,
1290                                          lockdep_is_held(&rt6_exception_lock));
1291         rcu_assign_pointer(rt6_ex->rt6i->from, NULL);
1292         fib6_info_release(from);
1293         dst_dev_put(&rt6_ex->rt6i->dst);
1294
1295         hlist_del_rcu(&rt6_ex->hlist);
1296         dst_release(&rt6_ex->rt6i->dst);
1297         kfree_rcu(rt6_ex, rcu);
1298         WARN_ON_ONCE(!bucket->depth);
1299         bucket->depth--;
1300 }
1301
1302 /* Remove oldest rt6_ex in bucket and free the memory
1303  * Caller must hold rt6_exception_lock
1304  */
1305 static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1306 {
1307         struct rt6_exception *rt6_ex, *oldest = NULL;
1308
1309         if (!bucket)
1310                 return;
1311
1312         hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1313                 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1314                         oldest = rt6_ex;
1315         }
1316         rt6_remove_exception(bucket, oldest);
1317 }
1318
1319 static u32 rt6_exception_hash(const struct in6_addr *dst,
1320                               const struct in6_addr *src)
1321 {
1322         static u32 seed __read_mostly;
1323         u32 val;
1324
1325         net_get_random_once(&seed, sizeof(seed));
1326         val = jhash(dst, sizeof(*dst), seed);
1327
1328 #ifdef CONFIG_IPV6_SUBTREES
1329         if (src)
1330                 val = jhash(src, sizeof(*src), val);
1331 #endif
1332         return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1333 }
1334
1335 /* Helper function to find the cached rt in the hash table
1336  * and update bucket pointer to point to the bucket for this
1337  * (daddr, saddr) pair
1338  * Caller must hold rt6_exception_lock
1339  */
1340 static struct rt6_exception *
1341 __rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1342                               const struct in6_addr *daddr,
1343                               const struct in6_addr *saddr)
1344 {
1345         struct rt6_exception *rt6_ex;
1346         u32 hval;
1347
1348         if (!(*bucket) || !daddr)
1349                 return NULL;
1350
1351         hval = rt6_exception_hash(daddr, saddr);
1352         *bucket += hval;
1353
1354         hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1355                 struct rt6_info *rt6 = rt6_ex->rt6i;
1356                 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1357
1358 #ifdef CONFIG_IPV6_SUBTREES
1359                 if (matched && saddr)
1360                         matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1361 #endif
1362                 if (matched)
1363                         return rt6_ex;
1364         }
1365         return NULL;
1366 }
1367
1368 /* Helper function to find the cached rt in the hash table
1369  * and update bucket pointer to point to the bucket for this
1370  * (daddr, saddr) pair
1371  * Caller must hold rcu_read_lock()
1372  */
1373 static struct rt6_exception *
1374 __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1375                          const struct in6_addr *daddr,
1376                          const struct in6_addr *saddr)
1377 {
1378         struct rt6_exception *rt6_ex;
1379         u32 hval;
1380
1381         WARN_ON_ONCE(!rcu_read_lock_held());
1382
1383         if (!(*bucket) || !daddr)
1384                 return NULL;
1385
1386         hval = rt6_exception_hash(daddr, saddr);
1387         *bucket += hval;
1388
1389         hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1390                 struct rt6_info *rt6 = rt6_ex->rt6i;
1391                 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1392
1393 #ifdef CONFIG_IPV6_SUBTREES
1394                 if (matched && saddr)
1395                         matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1396 #endif
1397                 if (matched)
1398                         return rt6_ex;
1399         }
1400         return NULL;
1401 }
1402
1403 static unsigned int fib6_mtu(const struct fib6_info *rt)
1404 {
1405         unsigned int mtu;
1406
1407         if (rt->fib6_pmtu) {
1408                 mtu = rt->fib6_pmtu;
1409         } else {
1410                 struct net_device *dev = fib6_info_nh_dev(rt);
1411                 struct inet6_dev *idev;
1412
1413                 rcu_read_lock();
1414                 idev = __in6_dev_get(dev);
1415                 mtu = idev->cnf.mtu6;
1416                 rcu_read_unlock();
1417         }
1418
1419         mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1420
1421         return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1422 }
1423
1424 static int rt6_insert_exception(struct rt6_info *nrt,
1425                                 struct fib6_info *ort)
1426 {
1427         struct net *net = dev_net(nrt->dst.dev);
1428         struct rt6_exception_bucket *bucket;
1429         struct in6_addr *src_key = NULL;
1430         struct rt6_exception *rt6_ex;
1431         int err = 0;
1432
1433         spin_lock_bh(&rt6_exception_lock);
1434
1435         if (ort->exception_bucket_flushed) {
1436                 err = -EINVAL;
1437                 goto out;
1438         }
1439
1440         bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1441                                         lockdep_is_held(&rt6_exception_lock));
1442         if (!bucket) {
1443                 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1444                                  GFP_ATOMIC);
1445                 if (!bucket) {
1446                         err = -ENOMEM;
1447                         goto out;
1448                 }
1449                 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1450         }
1451
1452 #ifdef CONFIG_IPV6_SUBTREES
1453         /* rt6i_src.plen != 0 indicates ort is in subtree
1454          * and exception table is indexed by a hash of
1455          * both rt6i_dst and rt6i_src.
1456          * Otherwise, the exception table is indexed by
1457          * a hash of only rt6i_dst.
1458          */
1459         if (ort->fib6_src.plen)
1460                 src_key = &nrt->rt6i_src.addr;
1461 #endif
1462         /* rt6_mtu_change() might lower mtu on ort.
1463          * Only insert this exception route if its mtu
1464          * is less than ort's mtu value.
1465          */
1466         if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
1467                 err = -EINVAL;
1468                 goto out;
1469         }
1470
1471         rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1472                                                src_key);
1473         if (rt6_ex)
1474                 rt6_remove_exception(bucket, rt6_ex);
1475
1476         rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1477         if (!rt6_ex) {
1478                 err = -ENOMEM;
1479                 goto out;
1480         }
1481         rt6_ex->rt6i = nrt;
1482         rt6_ex->stamp = jiffies;
1483         hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1484         bucket->depth++;
1485         net->ipv6.rt6_stats->fib_rt_cache++;
1486
1487         if (bucket->depth > FIB6_MAX_DEPTH)
1488                 rt6_exception_remove_oldest(bucket);
1489
1490 out:
1491         spin_unlock_bh(&rt6_exception_lock);
1492
1493         /* Update fn->fn_sernum to invalidate all cached dst */
1494         if (!err) {
1495                 spin_lock_bh(&ort->fib6_table->tb6_lock);
1496                 fib6_update_sernum(net, ort);
1497                 spin_unlock_bh(&ort->fib6_table->tb6_lock);
1498                 fib6_force_start_gc(net);
1499         }
1500
1501         return err;
1502 }
1503
1504 void rt6_flush_exceptions(struct fib6_info *rt)
1505 {
1506         struct rt6_exception_bucket *bucket;
1507         struct rt6_exception *rt6_ex;
1508         struct hlist_node *tmp;
1509         int i;
1510
1511         spin_lock_bh(&rt6_exception_lock);
1512         /* Prevent rt6_insert_exception() to recreate the bucket list */
1513         rt->exception_bucket_flushed = 1;
1514
1515         bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1516                                     lockdep_is_held(&rt6_exception_lock));
1517         if (!bucket)
1518                 goto out;
1519
1520         for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1521                 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1522                         rt6_remove_exception(bucket, rt6_ex);
1523                 WARN_ON_ONCE(bucket->depth);
1524                 bucket++;
1525         }
1526
1527 out:
1528         spin_unlock_bh(&rt6_exception_lock);
1529 }
1530
1531 /* Find cached rt in the hash table inside passed in rt
1532  * Caller has to hold rcu_read_lock()
1533  */
1534 static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
1535                                            struct in6_addr *daddr,
1536                                            struct in6_addr *saddr)
1537 {
1538         struct rt6_exception_bucket *bucket;
1539         struct in6_addr *src_key = NULL;
1540         struct rt6_exception *rt6_ex;
1541         struct rt6_info *res = NULL;
1542
1543         bucket = rcu_dereference(rt->rt6i_exception_bucket);
1544
1545 #ifdef CONFIG_IPV6_SUBTREES
1546         /* rt6i_src.plen != 0 indicates rt is in subtree
1547          * and exception table is indexed by a hash of
1548          * both rt6i_dst and rt6i_src.
1549          * Otherwise, the exception table is indexed by
1550          * a hash of only rt6i_dst.
1551          */
1552         if (rt->fib6_src.plen)
1553                 src_key = saddr;
1554 #endif
1555         rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1556
1557         if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1558                 res = rt6_ex->rt6i;
1559
1560         return res;
1561 }
1562
1563 /* Remove the passed in cached rt from the hash table that contains it */
1564 static int rt6_remove_exception_rt(struct rt6_info *rt)
1565 {
1566         struct rt6_exception_bucket *bucket;
1567         struct in6_addr *src_key = NULL;
1568         struct rt6_exception *rt6_ex;
1569         struct fib6_info *from;
1570         int err;
1571
1572         from = rcu_dereference(rt->from);
1573         if (!from ||
1574             !(rt->rt6i_flags & RTF_CACHE))
1575                 return -EINVAL;
1576
1577         if (!rcu_access_pointer(from->rt6i_exception_bucket))
1578                 return -ENOENT;
1579
1580         spin_lock_bh(&rt6_exception_lock);
1581         bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1582                                     lockdep_is_held(&rt6_exception_lock));
1583 #ifdef CONFIG_IPV6_SUBTREES
1584         /* rt6i_src.plen != 0 indicates 'from' is in subtree
1585          * and exception table is indexed by a hash of
1586          * both rt6i_dst and rt6i_src.
1587          * Otherwise, the exception table is indexed by
1588          * a hash of only rt6i_dst.
1589          */
1590         if (from->fib6_src.plen)
1591                 src_key = &rt->rt6i_src.addr;
1592 #endif
1593         rt6_ex = __rt6_find_exception_spinlock(&bucket,
1594                                                &rt->rt6i_dst.addr,
1595                                                src_key);
1596         if (rt6_ex) {
1597                 rt6_remove_exception(bucket, rt6_ex);
1598                 err = 0;
1599         } else {
1600                 err = -ENOENT;
1601         }
1602
1603         spin_unlock_bh(&rt6_exception_lock);
1604         return err;
1605 }
1606
1607 /* Find rt6_ex which contains the passed in rt cache and
1608  * refresh its stamp
1609  */
1610 static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1611 {
1612         struct rt6_exception_bucket *bucket;
1613         struct in6_addr *src_key = NULL;
1614         struct rt6_exception *rt6_ex;
1615         struct fib6_info *from;
1616
1617         rcu_read_lock();
1618         from = rcu_dereference(rt->from);
1619         if (!from || !(rt->rt6i_flags & RTF_CACHE))
1620                 goto unlock;
1621
1622         bucket = rcu_dereference(from->rt6i_exception_bucket);
1623
1624 #ifdef CONFIG_IPV6_SUBTREES
1625         /* rt6i_src.plen != 0 indicates 'from' is in subtree
1626          * and exception table is indexed by a hash of
1627          * both rt6i_dst and rt6i_src.
1628          * Otherwise, the exception table is indexed by
1629          * a hash of only rt6i_dst.
1630          */
1631         if (from->fib6_src.plen)
1632                 src_key = &rt->rt6i_src.addr;
1633 #endif
1634         rt6_ex = __rt6_find_exception_rcu(&bucket,
1635                                           &rt->rt6i_dst.addr,
1636                                           src_key);
1637         if (rt6_ex)
1638                 rt6_ex->stamp = jiffies;
1639
1640 unlock:
1641         rcu_read_unlock();
1642 }
1643
1644 static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1645                                          struct rt6_info *rt, int mtu)
1646 {
1647         /* If the new MTU is lower than the route PMTU, this new MTU will be the
1648          * lowest MTU in the path: always allow updating the route PMTU to
1649          * reflect PMTU decreases.
1650          *
1651          * If the new MTU is higher, and the route PMTU is equal to the local
1652          * MTU, this means the old MTU is the lowest in the path, so allow
1653          * updating it: if other nodes now have lower MTUs, PMTU discovery will
1654          * handle this.
1655          */
1656
1657         if (dst_mtu(&rt->dst) >= mtu)
1658                 return true;
1659
1660         if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1661                 return true;
1662
1663         return false;
1664 }
1665
1666 static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
1667                                        struct fib6_info *rt, int mtu)
1668 {
1669         struct rt6_exception_bucket *bucket;
1670         struct rt6_exception *rt6_ex;
1671         int i;
1672
1673         bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1674                                         lockdep_is_held(&rt6_exception_lock));
1675
1676         if (!bucket)
1677                 return;
1678
1679         for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1680                 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1681                         struct rt6_info *entry = rt6_ex->rt6i;
1682
1683                         /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
1684                          * route), the metrics of its rt->from have already
1685                          * been updated.
1686                          */
1687                         if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
1688                             rt6_mtu_change_route_allowed(idev, entry, mtu))
1689                                 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
1690                 }
1691                 bucket++;
1692         }
1693 }
1694
1695 #define RTF_CACHE_GATEWAY       (RTF_GATEWAY | RTF_CACHE)
1696
1697 static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
1698                                         struct in6_addr *gateway)
1699 {
1700         struct rt6_exception_bucket *bucket;
1701         struct rt6_exception *rt6_ex;
1702         struct hlist_node *tmp;
1703         int i;
1704
1705         if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1706                 return;
1707
1708         spin_lock_bh(&rt6_exception_lock);
1709         bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1710                                      lockdep_is_held(&rt6_exception_lock));
1711
1712         if (bucket) {
1713                 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1714                         hlist_for_each_entry_safe(rt6_ex, tmp,
1715                                                   &bucket->chain, hlist) {
1716                                 struct rt6_info *entry = rt6_ex->rt6i;
1717
1718                                 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1719                                     RTF_CACHE_GATEWAY &&
1720                                     ipv6_addr_equal(gateway,
1721                                                     &entry->rt6i_gateway)) {
1722                                         rt6_remove_exception(bucket, rt6_ex);
1723                                 }
1724                         }
1725                         bucket++;
1726                 }
1727         }
1728
1729         spin_unlock_bh(&rt6_exception_lock);
1730 }
1731
1732 static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1733                                       struct rt6_exception *rt6_ex,
1734                                       struct fib6_gc_args *gc_args,
1735                                       unsigned long now)
1736 {
1737         struct rt6_info *rt = rt6_ex->rt6i;
1738
1739         /* we are pruning and obsoleting aged-out and non gateway exceptions
1740          * even if others have still references to them, so that on next
1741          * dst_check() such references can be dropped.
1742          * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1743          * expired, independently from their aging, as per RFC 8201 section 4
1744          */
1745         if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1746                 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1747                         RT6_TRACE("aging clone %p\n", rt);
1748                         rt6_remove_exception(bucket, rt6_ex);
1749                         return;
1750                 }
1751         } else if (time_after(jiffies, rt->dst.expires)) {
1752                 RT6_TRACE("purging expired route %p\n", rt);
1753                 rt6_remove_exception(bucket, rt6_ex);
1754                 return;
1755         }
1756
1757         if (rt->rt6i_flags & RTF_GATEWAY) {
1758                 struct neighbour *neigh;
1759                 __u8 neigh_flags = 0;
1760
1761                 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1762                 if (neigh)
1763                         neigh_flags = neigh->flags;
1764
1765                 if (!(neigh_flags & NTF_ROUTER)) {
1766                         RT6_TRACE("purging route %p via non-router but gateway\n",
1767                                   rt);
1768                         rt6_remove_exception(bucket, rt6_ex);
1769                         return;
1770                 }
1771         }
1772
1773         gc_args->more++;
1774 }
1775
1776 void rt6_age_exceptions(struct fib6_info *rt,
1777                         struct fib6_gc_args *gc_args,
1778                         unsigned long now)
1779 {
1780         struct rt6_exception_bucket *bucket;
1781         struct rt6_exception *rt6_ex;
1782         struct hlist_node *tmp;
1783         int i;
1784
1785         if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1786                 return;
1787
1788         rcu_read_lock_bh();
1789         spin_lock(&rt6_exception_lock);
1790         bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1791                                     lockdep_is_held(&rt6_exception_lock));
1792
1793         if (bucket) {
1794                 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1795                         hlist_for_each_entry_safe(rt6_ex, tmp,
1796                                                   &bucket->chain, hlist) {
1797                                 rt6_age_examine_exception(bucket, rt6_ex,
1798                                                           gc_args, now);
1799                         }
1800                         bucket++;
1801                 }
1802         }
1803         spin_unlock(&rt6_exception_lock);
1804         rcu_read_unlock_bh();
1805 }
1806
1807 /* must be called with rcu lock held */
1808 struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
1809                                     int oif, struct flowi6 *fl6, int strict)
1810 {
1811         struct fib6_node *fn, *saved_fn;
1812         struct fib6_info *f6i;
1813
1814         fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1815         saved_fn = fn;
1816
1817         if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1818                 oif = 0;
1819
1820 redo_rt6_select:
1821         f6i = rt6_select(net, fn, oif, strict);
1822         if (f6i == net->ipv6.fib6_null_entry) {
1823                 fn = fib6_backtrack(fn, &fl6->saddr);
1824                 if (fn)
1825                         goto redo_rt6_select;
1826                 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1827                         /* also consider unreachable route */
1828                         strict &= ~RT6_LOOKUP_F_REACHABLE;
1829                         fn = saved_fn;
1830                         goto redo_rt6_select;
1831                 }
1832         }
1833
1834         trace_fib6_table_lookup(net, f6i, table, fl6);
1835
1836         return f6i;
1837 }
1838
1839 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1840                                int oif, struct flowi6 *fl6,
1841                                const struct sk_buff *skb, int flags)
1842 {
1843         struct fib6_info *f6i;
1844         struct rt6_info *rt;
1845         int strict = 0;
1846
1847         strict |= flags & RT6_LOOKUP_F_IFACE;
1848         strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1849         if (net->ipv6.devconf_all->forwarding == 0)
1850                 strict |= RT6_LOOKUP_F_REACHABLE;
1851
1852         rcu_read_lock();
1853
1854         f6i = fib6_table_lookup(net, table, oif, fl6, strict);
1855         if (f6i->fib6_nsiblings)
1856                 f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
1857
1858         if (f6i == net->ipv6.fib6_null_entry) {
1859                 rt = net->ipv6.ip6_null_entry;
1860                 rcu_read_unlock();
1861                 dst_hold(&rt->dst);
1862                 return rt;
1863         }
1864
1865         /*Search through exception table */
1866         rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1867         if (rt) {
1868                 if (ip6_hold_safe(net, &rt, true))
1869                         dst_use_noref(&rt->dst, jiffies);
1870
1871                 rcu_read_unlock();
1872                 return rt;
1873         } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1874                             !(f6i->fib6_flags & RTF_GATEWAY))) {
1875                 /* Create a RTF_CACHE clone which will not be
1876                  * owned by the fib6 tree.  It is for the special case where
1877                  * the daddr in the skb during the neighbor look-up is different
1878                  * from the fl6->daddr used to look-up route here.
1879                  */
1880                 struct rt6_info *uncached_rt;
1881
1882                 uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
1883
1884                 rcu_read_unlock();
1885
1886                 if (uncached_rt) {
1887                         /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1888                          * No need for another dst_hold()
1889                          */
1890                         rt6_uncached_list_add(uncached_rt);
1891                         atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1892                 } else {
1893                         uncached_rt = net->ipv6.ip6_null_entry;
1894                         dst_hold(&uncached_rt->dst);
1895                 }
1896
1897                 return uncached_rt;
1898         } else {
1899                 /* Get a percpu copy */
1900
1901                 struct rt6_info *pcpu_rt;
1902
1903                 local_bh_disable();
1904                 pcpu_rt = rt6_get_pcpu_route(f6i);
1905
1906                 if (!pcpu_rt)
1907                         pcpu_rt = rt6_make_pcpu_route(net, f6i);
1908
1909                 local_bh_enable();
1910                 rcu_read_unlock();
1911
1912                 return pcpu_rt;
1913         }
1914 }
1915 EXPORT_SYMBOL_GPL(ip6_pol_route);
1916
1917 static struct rt6_info *ip6_pol_route_input(struct net *net,
1918                                             struct fib6_table *table,
1919                                             struct flowi6 *fl6,
1920                                             const struct sk_buff *skb,
1921                                             int flags)
1922 {
1923         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
1924 }
1925
1926 struct dst_entry *ip6_route_input_lookup(struct net *net,
1927                                          struct net_device *dev,
1928                                          struct flowi6 *fl6,
1929                                          const struct sk_buff *skb,
1930                                          int flags)
1931 {
1932         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1933                 flags |= RT6_LOOKUP_F_IFACE;
1934
1935         return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
1936 }
1937 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1938
1939 static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1940                                   struct flow_keys *keys,
1941                                   struct flow_keys *flkeys)
1942 {
1943         const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1944         const struct ipv6hdr *key_iph = outer_iph;
1945         struct flow_keys *_flkeys = flkeys;
1946         const struct ipv6hdr *inner_iph;
1947         const struct icmp6hdr *icmph;
1948         struct ipv6hdr _inner_iph;
1949         struct icmp6hdr _icmph;
1950
1951         if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1952                 goto out;
1953
1954         icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1955                                    sizeof(_icmph), &_icmph);
1956         if (!icmph)
1957                 goto out;
1958
1959         if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1960             icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1961             icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1962             icmph->icmp6_type != ICMPV6_PARAMPROB)
1963                 goto out;
1964
1965         inner_iph = skb_header_pointer(skb,
1966                                        skb_transport_offset(skb) + sizeof(*icmph),
1967                                        sizeof(_inner_iph), &_inner_iph);
1968         if (!inner_iph)
1969                 goto out;
1970
1971         key_iph = inner_iph;
1972         _flkeys = NULL;
1973 out:
1974         if (_flkeys) {
1975                 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1976                 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1977                 keys->tags.flow_label = _flkeys->tags.flow_label;
1978                 keys->basic.ip_proto = _flkeys->basic.ip_proto;
1979         } else {
1980                 keys->addrs.v6addrs.src = key_iph->saddr;
1981                 keys->addrs.v6addrs.dst = key_iph->daddr;
1982                 keys->tags.flow_label = ip6_flowlabel(key_iph);
1983                 keys->basic.ip_proto = key_iph->nexthdr;
1984         }
1985 }
1986
1987 /* if skb is set it will be used and fl6 can be NULL */
1988 u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1989                        const struct sk_buff *skb, struct flow_keys *flkeys)
1990 {
1991         struct flow_keys hash_keys;
1992         u32 mhash;
1993
1994         switch (ip6_multipath_hash_policy(net)) {
1995         case 0:
1996                 memset(&hash_keys, 0, sizeof(hash_keys));
1997                 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1998                 if (skb) {
1999                         ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2000                 } else {
2001                         hash_keys.addrs.v6addrs.src = fl6->saddr;
2002                         hash_keys.addrs.v6addrs.dst = fl6->daddr;
2003                         hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2004                         hash_keys.basic.ip_proto = fl6->flowi6_proto;
2005                 }
2006                 break;
2007         case 1:
2008                 if (skb) {
2009                         unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2010                         struct flow_keys keys;
2011
2012                         /* short-circuit if we already have L4 hash present */
2013                         if (skb->l4_hash)
2014                                 return skb_get_hash_raw(skb) >> 1;
2015
2016                         memset(&hash_keys, 0, sizeof(hash_keys));
2017
2018                         if (!flkeys) {
2019                                 skb_flow_dissect_flow_keys(skb, &keys, flag);
2020                                 flkeys = &keys;
2021                         }
2022                         hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2023                         hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2024                         hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2025                         hash_keys.ports.src = flkeys->ports.src;
2026                         hash_keys.ports.dst = flkeys->ports.dst;
2027                         hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2028                 } else {
2029                         memset(&hash_keys, 0, sizeof(hash_keys));
2030                         hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2031                         hash_keys.addrs.v6addrs.src = fl6->saddr;
2032                         hash_keys.addrs.v6addrs.dst = fl6->daddr;
2033                         hash_keys.ports.src = fl6->fl6_sport;
2034                         hash_keys.ports.dst = fl6->fl6_dport;
2035                         hash_keys.basic.ip_proto = fl6->flowi6_proto;
2036                 }
2037                 break;
2038         }
2039         mhash = flow_hash_from_keys(&hash_keys);
2040
2041         return mhash >> 1;
2042 }
2043
2044 void ip6_route_input(struct sk_buff *skb)
2045 {
2046         const struct ipv6hdr *iph = ipv6_hdr(skb);
2047         struct net *net = dev_net(skb->dev);
2048         int flags = RT6_LOOKUP_F_HAS_SADDR;
2049         struct ip_tunnel_info *tun_info;
2050         struct flowi6 fl6 = {
2051                 .flowi6_iif = skb->dev->ifindex,
2052                 .daddr = iph->daddr,
2053                 .saddr = iph->saddr,
2054                 .flowlabel = ip6_flowinfo(iph),
2055                 .flowi6_mark = skb->mark,
2056                 .flowi6_proto = iph->nexthdr,
2057         };
2058         struct flow_keys *flkeys = NULL, _flkeys;
2059
2060         tun_info = skb_tunnel_info(skb);
2061         if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
2062                 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
2063
2064         if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2065                 flkeys = &_flkeys;
2066
2067         if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2068                 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
2069         skb_dst_drop(skb);
2070         skb_dst_set(skb,
2071                     ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
2072 }
2073
2074 static struct rt6_info *ip6_pol_route_output(struct net *net,
2075                                              struct fib6_table *table,
2076                                              struct flowi6 *fl6,
2077                                              const struct sk_buff *skb,
2078                                              int flags)
2079 {
2080         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2081 }
2082
2083 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2084                                          struct flowi6 *fl6, int flags)
2085 {
2086         bool any_src;
2087
2088         if (ipv6_addr_type(&fl6->daddr) &
2089             (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
2090                 struct dst_entry *dst;
2091
2092                 dst = l3mdev_link_scope_lookup(net, fl6);
2093                 if (dst)
2094                         return dst;
2095         }
2096
2097         fl6->flowi6_iif = LOOPBACK_IFINDEX;
2098
2099         any_src = ipv6_addr_any(&fl6->saddr);
2100         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2101             (fl6->flowi6_oif && any_src))
2102                 flags |= RT6_LOOKUP_F_IFACE;
2103
2104         if (!any_src)
2105                 flags |= RT6_LOOKUP_F_HAS_SADDR;
2106         else if (sk)
2107                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
2108
2109         return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
2110 }
2111 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
2112
2113 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2114 {
2115         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
2116         struct net_device *loopback_dev = net->loopback_dev;
2117         struct dst_entry *new = NULL;
2118
2119         rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
2120                        DST_OBSOLETE_DEAD, 0);
2121         if (rt) {
2122                 rt6_info_init(rt);
2123                 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
2124
2125                 new = &rt->dst;
2126                 new->__use = 1;
2127                 new->input = dst_discard;
2128                 new->output = dst_discard_out;
2129
2130                 dst_copy_metrics(new, &ort->dst);
2131
2132                 rt->rt6i_idev = in6_dev_get(loopback_dev);
2133                 rt->rt6i_gateway = ort->rt6i_gateway;
2134                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
2135
2136                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2137 #ifdef CONFIG_IPV6_SUBTREES
2138                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2139 #endif
2140         }
2141
2142         dst_release(dst_orig);
2143         return new ? new : ERR_PTR(-ENOMEM);
2144 }
2145
2146 /*
2147  *      Destination cache support functions
2148  */
2149
2150 static bool fib6_check(struct fib6_info *f6i, u32 cookie)
2151 {
2152         u32 rt_cookie = 0;
2153
2154         if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
2155                 return false;
2156
2157         if (fib6_check_expired(f6i))
2158                 return false;
2159
2160         return true;
2161 }
2162
2163 static struct dst_entry *rt6_check(struct rt6_info *rt,
2164                                    struct fib6_info *from,
2165                                    u32 cookie)
2166 {
2167         u32 rt_cookie = 0;
2168
2169         if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
2170             rt_cookie != cookie)
2171                 return NULL;
2172
2173         if (rt6_check_expired(rt))
2174                 return NULL;
2175
2176         return &rt->dst;
2177 }
2178
2179 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2180                                             struct fib6_info *from,
2181                                             u32 cookie)
2182 {
2183         if (!__rt6_check_expired(rt) &&
2184             rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
2185             fib6_check(from, cookie))
2186                 return &rt->dst;
2187         else
2188                 return NULL;
2189 }
2190
2191 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2192 {
2193         struct dst_entry *dst_ret;
2194         struct fib6_info *from;
2195         struct rt6_info *rt;
2196
2197         rt = container_of(dst, struct rt6_info, dst);
2198
2199         rcu_read_lock();
2200
2201         /* All IPV6 dsts are created with ->obsolete set to the value
2202          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2203          * into this function always.
2204          */
2205
2206         from = rcu_dereference(rt->from);
2207
2208         if (from && (rt->rt6i_flags & RTF_PCPU ||
2209             unlikely(!list_empty(&rt->rt6i_uncached))))
2210                 dst_ret = rt6_dst_from_check(rt, from, cookie);
2211         else
2212                 dst_ret = rt6_check(rt, from, cookie);
2213
2214         rcu_read_unlock();
2215
2216         return dst_ret;
2217 }
2218
2219 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2220 {
2221         struct rt6_info *rt = (struct rt6_info *) dst;
2222
2223         if (rt) {
2224                 if (rt->rt6i_flags & RTF_CACHE) {
2225                         rcu_read_lock();
2226                         if (rt6_check_expired(rt)) {
2227                                 rt6_remove_exception_rt(rt);
2228                                 dst = NULL;
2229                         }
2230                         rcu_read_unlock();
2231                 } else {
2232                         dst_release(dst);
2233                         dst = NULL;
2234                 }
2235         }
2236         return dst;
2237 }
2238
2239 static void ip6_link_failure(struct sk_buff *skb)
2240 {
2241         struct rt6_info *rt;
2242
2243         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
2244
2245         rt = (struct rt6_info *) skb_dst(skb);
2246         if (rt) {
2247                 rcu_read_lock();
2248                 if (rt->rt6i_flags & RTF_CACHE) {
2249                         rt6_remove_exception_rt(rt);
2250                 } else {
2251                         struct fib6_info *from;
2252                         struct fib6_node *fn;
2253
2254                         from = rcu_dereference(rt->from);
2255                         if (from) {
2256                                 fn = rcu_dereference(from->fib6_node);
2257                                 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2258                                         fn->fn_sernum = -1;
2259                         }
2260                 }
2261                 rcu_read_unlock();
2262         }
2263 }
2264
2265 static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2266 {
2267         if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2268                 struct fib6_info *from;
2269
2270                 rcu_read_lock();
2271                 from = rcu_dereference(rt0->from);
2272                 if (from)
2273                         rt0->dst.expires = from->expires;
2274                 rcu_read_unlock();
2275         }
2276
2277         dst_set_expires(&rt0->dst, timeout);
2278         rt0->rt6i_flags |= RTF_EXPIRES;
2279 }
2280
2281 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2282 {
2283         struct net *net = dev_net(rt->dst.dev);
2284
2285         dst_metric_set(&rt->dst, RTAX_MTU, mtu);
2286         rt->rt6i_flags |= RTF_MODIFIED;
2287         rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2288 }
2289
2290 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2291 {
2292         return !(rt->rt6i_flags & RTF_CACHE) &&
2293                 (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
2294 }
2295
2296 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2297                                  const struct ipv6hdr *iph, u32 mtu)
2298 {
2299         const struct in6_addr *daddr, *saddr;
2300         struct rt6_info *rt6 = (struct rt6_info *)dst;
2301
2302         if (dst_metric_locked(dst, RTAX_MTU))
2303                 return;
2304
2305         if (iph) {
2306                 daddr = &iph->daddr;
2307                 saddr = &iph->saddr;
2308         } else if (sk) {
2309                 daddr = &sk->sk_v6_daddr;
2310                 saddr = &inet6_sk(sk)->saddr;
2311         } else {
2312                 daddr = NULL;
2313                 saddr = NULL;
2314         }
2315         dst_confirm_neigh(dst, daddr);
2316         mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2317         if (mtu >= dst_mtu(dst))
2318                 return;
2319
2320         if (!rt6_cache_allowed_for_pmtu(rt6)) {
2321                 rt6_do_update_pmtu(rt6, mtu);
2322                 /* update rt6_ex->stamp for cache */
2323                 if (rt6->rt6i_flags & RTF_CACHE)
2324                         rt6_update_exception_stamp_rt(rt6);
2325         } else if (daddr) {
2326                 struct fib6_info *from;
2327                 struct rt6_info *nrt6;
2328
2329                 rcu_read_lock();
2330                 from = rcu_dereference(rt6->from);
2331                 nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
2332                 if (nrt6) {
2333                         rt6_do_update_pmtu(nrt6, mtu);
2334                         if (rt6_insert_exception(nrt6, from))
2335                                 dst_release_immediate(&nrt6->dst);
2336                 }
2337                 rcu_read_unlock();
2338         }
2339 }
2340
2341 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2342                                struct sk_buff *skb, u32 mtu)
2343 {
2344         __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2345 }
2346
2347 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2348                      int oif, u32 mark, kuid_t uid)
2349 {
2350         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2351         struct dst_entry *dst;
2352         struct flowi6 fl6 = {
2353                 .flowi6_oif = oif,
2354                 .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2355                 .daddr = iph->daddr,
2356                 .saddr = iph->saddr,
2357                 .flowlabel = ip6_flowinfo(iph),
2358                 .flowi6_uid = uid,
2359         };
2360
2361         dst = ip6_route_output(net, NULL, &fl6);
2362         if (!dst->error)
2363                 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
2364         dst_release(dst);
2365 }
2366 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2367
2368 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2369 {
2370         int oif = sk->sk_bound_dev_if;
2371         struct dst_entry *dst;
2372
2373         if (!oif && skb->dev)
2374                 oif = l3mdev_master_ifindex(skb->dev);
2375
2376         ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
2377
2378         dst = __sk_dst_get(sk);
2379         if (!dst || !dst->obsolete ||
2380             dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2381                 return;
2382
2383         bh_lock_sock(sk);
2384         if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2385                 ip6_datagram_dst_update(sk, false);
2386         bh_unlock_sock(sk);
2387 }
2388 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2389
2390 void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2391                            const struct flowi6 *fl6)
2392 {
2393 #ifdef CONFIG_IPV6_SUBTREES
2394         struct ipv6_pinfo *np = inet6_sk(sk);
2395 #endif
2396
2397         ip6_dst_store(sk, dst,
2398                       ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2399                       &sk->sk_v6_daddr : NULL,
2400 #ifdef CONFIG_IPV6_SUBTREES
2401                       ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2402                       &np->saddr :
2403 #endif
2404                       NULL);
2405 }
2406
2407 /* Handle redirects */
2408 struct ip6rd_flowi {
2409         struct flowi6 fl6;
2410         struct in6_addr gateway;
2411 };
2412
2413 static struct rt6_info *__ip6_route_redirect(struct net *net,
2414                                              struct fib6_table *table,
2415                                              struct flowi6 *fl6,
2416                                              const struct sk_buff *skb,
2417                                              int flags)
2418 {
2419         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2420         struct rt6_info *ret = NULL, *rt_cache;
2421         struct fib6_info *rt;
2422         struct fib6_node *fn;
2423
2424         /* Get the "current" route for this destination and
2425          * check if the redirect has come from appropriate router.
2426          *
2427          * RFC 4861 specifies that redirects should only be
2428          * accepted if they come from the nexthop to the target.
2429          * Due to the way the routes are chosen, this notion
2430          * is a bit fuzzy and one might need to check all possible
2431          * routes.
2432          */
2433
2434         rcu_read_lock();
2435         fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2436 restart:
2437         for_each_fib6_node_rt_rcu(fn) {
2438                 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
2439                         continue;
2440                 if (fib6_check_expired(rt))
2441                         continue;
2442                 if (rt->fib6_flags & RTF_REJECT)
2443                         break;
2444                 if (!(rt->fib6_flags & RTF_GATEWAY))
2445                         continue;
2446                 if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
2447                         continue;
2448                 /* rt_cache's gateway might be different from its 'parent'
2449                  * in the case of an ip redirect.
2450                  * So we keep searching in the exception table if the gateway
2451                  * is different.
2452                  */
2453                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
2454                         rt_cache = rt6_find_cached_rt(rt,
2455                                                       &fl6->daddr,
2456                                                       &fl6->saddr);
2457                         if (rt_cache &&
2458                             ipv6_addr_equal(&rdfl->gateway,
2459                                             &rt_cache->rt6i_gateway)) {
2460                                 ret = rt_cache;
2461                                 break;
2462                         }
2463                         continue;
2464                 }
2465                 break;
2466         }
2467
2468         if (!rt)
2469                 rt = net->ipv6.fib6_null_entry;
2470         else if (rt->fib6_flags & RTF_REJECT) {
2471                 ret = net->ipv6.ip6_null_entry;
2472                 goto out;
2473         }
2474
2475         if (rt == net->ipv6.fib6_null_entry) {
2476                 fn = fib6_backtrack(fn, &fl6->saddr);
2477                 if (fn)
2478                         goto restart;
2479         }
2480
2481 out:
2482         if (ret)
2483                 ip6_hold_safe(net, &ret, true);
2484         else
2485                 ret = ip6_create_rt_rcu(rt);
2486
2487         rcu_read_unlock();
2488
2489         trace_fib6_table_lookup(net, rt, table, fl6);
2490         return ret;
2491 };
2492
2493 static struct dst_entry *ip6_route_redirect(struct net *net,
2494                                             const struct flowi6 *fl6,
2495                                             const struct sk_buff *skb,
2496                                             const struct in6_addr *gateway)
2497 {
2498         int flags = RT6_LOOKUP_F_HAS_SADDR;
2499         struct ip6rd_flowi rdfl;
2500
2501         rdfl.fl6 = *fl6;
2502         rdfl.gateway = *gateway;
2503
2504         return fib6_rule_lookup(net, &rdfl.fl6, skb,
2505                                 flags, __ip6_route_redirect);
2506 }
2507
2508 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2509                   kuid_t uid)
2510 {
2511         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2512         struct dst_entry *dst;
2513         struct flowi6 fl6 = {
2514                 .flowi6_iif = LOOPBACK_IFINDEX,
2515                 .flowi6_oif = oif,
2516                 .flowi6_mark = mark,
2517                 .daddr = iph->daddr,
2518                 .saddr = iph->saddr,
2519                 .flowlabel = ip6_flowinfo(iph),
2520                 .flowi6_uid = uid,
2521         };
2522
2523         dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
2524         rt6_do_redirect(dst, NULL, skb);
2525         dst_release(dst);
2526 }
2527 EXPORT_SYMBOL_GPL(ip6_redirect);
2528
2529 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
2530 {
2531         const struct ipv6hdr *iph = ipv6_hdr(skb);
2532         const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2533         struct dst_entry *dst;
2534         struct flowi6 fl6 = {
2535                 .flowi6_iif = LOOPBACK_IFINDEX,
2536                 .flowi6_oif = oif,
2537                 .daddr = msg->dest,
2538                 .saddr = iph->daddr,
2539                 .flowi6_uid = sock_net_uid(net, NULL),
2540         };
2541
2542         dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
2543         rt6_do_redirect(dst, NULL, skb);
2544         dst_release(dst);
2545 }
2546
2547 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2548 {
2549         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2550                      sk->sk_uid);
2551 }
2552 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2553
2554 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
2555 {
2556         struct net_device *dev = dst->dev;
2557         unsigned int mtu = dst_mtu(dst);
2558         struct net *net = dev_net(dev);
2559
2560         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2561
2562         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2563                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
2564
2565         /*
2566          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2567          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2568          * IPV6_MAXPLEN is also valid and means: "any MSS,
2569          * rely only on pmtu discovery"
2570          */
2571         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2572                 mtu = IPV6_MAXPLEN;
2573         return mtu;
2574 }
2575
2576 static unsigned int ip6_mtu(const struct dst_entry *dst)
2577 {
2578         struct inet6_dev *idev;
2579         unsigned int mtu;
2580
2581         mtu = dst_metric_raw(dst, RTAX_MTU);
2582         if (mtu)
2583                 goto out;
2584
2585         mtu = IPV6_MIN_MTU;
2586
2587         rcu_read_lock();
2588         idev = __in6_dev_get(dst->dev);
2589         if (idev)
2590                 mtu = idev->cnf.mtu6;
2591         rcu_read_unlock();
2592
2593 out:
2594         mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2595
2596         return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
2597 }
2598
2599 /* MTU selection:
2600  * 1. mtu on route is locked - use it
2601  * 2. mtu from nexthop exception
2602  * 3. mtu from egress device
2603  *
2604  * based on ip6_dst_mtu_forward and exception logic of
2605  * rt6_find_cached_rt; called with rcu_read_lock
2606  */
2607 u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
2608                       struct in6_addr *saddr)
2609 {
2610         struct rt6_exception_bucket *bucket;
2611         struct rt6_exception *rt6_ex;
2612         struct in6_addr *src_key;
2613         struct inet6_dev *idev;
2614         u32 mtu = 0;
2615
2616         if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2617                 mtu = f6i->fib6_pmtu;
2618                 if (mtu)
2619                         goto out;
2620         }
2621
2622         src_key = NULL;
2623 #ifdef CONFIG_IPV6_SUBTREES
2624         if (f6i->fib6_src.plen)
2625                 src_key = saddr;
2626 #endif
2627
2628         bucket = rcu_dereference(f6i->rt6i_exception_bucket);
2629         rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
2630         if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
2631                 mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
2632
2633         if (likely(!mtu)) {
2634                 struct net_device *dev = fib6_info_nh_dev(f6i);
2635
2636                 mtu = IPV6_MIN_MTU;
2637                 idev = __in6_dev_get(dev);
2638                 if (idev && idev->cnf.mtu6 > mtu)
2639                         mtu = idev->cnf.mtu6;
2640         }
2641
2642         mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2643 out:
2644         return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
2645 }
2646
2647 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
2648                                   struct flowi6 *fl6)
2649 {
2650         struct dst_entry *dst;
2651         struct rt6_info *rt;
2652         struct inet6_dev *idev = in6_dev_get(dev);
2653         struct net *net = dev_net(dev);
2654
2655         if (unlikely(!idev))
2656                 return ERR_PTR(-ENODEV);
2657
2658         rt = ip6_dst_alloc(net, dev, 0);
2659         if (unlikely(!rt)) {
2660                 in6_dev_put(idev);
2661                 dst = ERR_PTR(-ENOMEM);
2662                 goto out;
2663         }
2664
2665         rt->dst.flags |= DST_HOST;
2666         rt->dst.input = ip6_input;
2667         rt->dst.output  = ip6_output;
2668         rt->rt6i_gateway  = fl6->daddr;
2669         rt->rt6i_dst.addr = fl6->daddr;
2670         rt->rt6i_dst.plen = 128;
2671         rt->rt6i_idev     = idev;
2672         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
2673
2674         /* Add this dst into uncached_list so that rt6_disable_ip() can
2675          * do proper release of the net_device
2676          */
2677         rt6_uncached_list_add(rt);
2678         atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
2679
2680         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2681
2682 out:
2683         return dst;
2684 }
2685
2686 static int ip6_dst_gc(struct dst_ops *ops)
2687 {
2688         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
2689         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2690         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2691         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2692         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2693         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
2694         int entries;
2695
2696         entries = dst_entries_get_fast(ops);
2697         if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
2698             entries <= rt_max_size)
2699                 goto out;
2700
2701         net->ipv6.ip6_rt_gc_expire++;
2702         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
2703         entries = dst_entries_get_slow(ops);
2704         if (entries < ops->gc_thresh)
2705                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
2706 out:
2707         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
2708         return entries > rt_max_size;
2709 }
2710
2711 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2712                                             struct fib6_config *cfg,
2713                                             const struct in6_addr *gw_addr,
2714                                             u32 tbid, int flags)
2715 {
2716         struct flowi6 fl6 = {
2717                 .flowi6_oif = cfg->fc_ifindex,
2718                 .daddr = *gw_addr,
2719                 .saddr = cfg->fc_prefsrc,
2720         };
2721         struct fib6_table *table;
2722         struct rt6_info *rt;
2723
2724         table = fib6_get_table(net, tbid);
2725         if (!table)
2726                 return NULL;
2727
2728         if (!ipv6_addr_any(&cfg->fc_prefsrc))
2729                 flags |= RT6_LOOKUP_F_HAS_SADDR;
2730
2731         flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
2732         rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
2733
2734         /* if table lookup failed, fall back to full lookup */
2735         if (rt == net->ipv6.ip6_null_entry) {
2736                 ip6_rt_put(rt);
2737                 rt = NULL;
2738         }
2739
2740         return rt;
2741 }
2742
2743 static int ip6_route_check_nh_onlink(struct net *net,
2744                                      struct fib6_config *cfg,
2745                                      const struct net_device *dev,
2746                                      struct netlink_ext_ack *extack)
2747 {
2748         u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
2749         const struct in6_addr *gw_addr = &cfg->fc_gateway;
2750         u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2751         struct fib6_info *from;
2752         struct rt6_info *grt;
2753         int err;
2754
2755         err = 0;
2756         grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2757         if (grt) {
2758                 rcu_read_lock();
2759                 from = rcu_dereference(grt->from);
2760                 if (!grt->dst.error &&
2761                     /* ignore match if it is the default route */
2762                     from && !ipv6_addr_any(&from->fib6_dst.addr) &&
2763                     (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
2764                         NL_SET_ERR_MSG(extack,
2765                                        "Nexthop has invalid gateway or device mismatch");
2766                         err = -EINVAL;
2767                 }
2768                 rcu_read_unlock();
2769
2770                 ip6_rt_put(grt);
2771         }
2772
2773         return err;
2774 }
2775
2776 static int ip6_route_check_nh(struct net *net,
2777                               struct fib6_config *cfg,
2778                               struct net_device **_dev,
2779                               struct inet6_dev **idev)
2780 {
2781         const struct in6_addr *gw_addr = &cfg->fc_gateway;
2782         struct net_device *dev = _dev ? *_dev : NULL;
2783         struct rt6_info *grt = NULL;
2784         int err = -EHOSTUNREACH;
2785
2786         if (cfg->fc_table) {
2787                 int flags = RT6_LOOKUP_F_IFACE;
2788
2789                 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2790                                           cfg->fc_table, flags);
2791                 if (grt) {
2792                         if (grt->rt6i_flags & RTF_GATEWAY ||
2793                             (dev && dev != grt->dst.dev)) {
2794                                 ip6_rt_put(grt);
2795                                 grt = NULL;
2796                         }
2797                 }
2798         }
2799
2800         if (!grt)
2801                 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
2802
2803         if (!grt)
2804                 goto out;
2805
2806         if (dev) {
2807                 if (dev != grt->dst.dev) {
2808                         ip6_rt_put(grt);
2809                         goto out;
2810                 }
2811         } else {
2812                 *_dev = dev = grt->dst.dev;
2813                 *idev = grt->rt6i_idev;
2814                 dev_hold(dev);
2815                 in6_dev_hold(grt->rt6i_idev);
2816         }
2817
2818         if (!(grt->rt6i_flags & RTF_GATEWAY))
2819                 err = 0;
2820
2821         ip6_rt_put(grt);
2822
2823 out:
2824         return err;
2825 }
2826
2827 static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2828                            struct net_device **_dev, struct inet6_dev **idev,
2829                            struct netlink_ext_ack *extack)
2830 {
2831         const struct in6_addr *gw_addr = &cfg->fc_gateway;
2832         int gwa_type = ipv6_addr_type(gw_addr);
2833         bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
2834         const struct net_device *dev = *_dev;
2835         bool need_addr_check = !dev;
2836         int err = -EINVAL;
2837
2838         /* if gw_addr is local we will fail to detect this in case
2839          * address is still TENTATIVE (DAD in progress). rt6_lookup()
2840          * will return already-added prefix route via interface that
2841          * prefix route was assigned to, which might be non-loopback.
2842          */
2843         if (dev &&
2844             ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2845                 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2846                 goto out;
2847         }
2848
2849         if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2850                 /* IPv6 strictly inhibits using not link-local
2851                  * addresses as nexthop address.
2852                  * Otherwise, router will not able to send redirects.
2853                  * It is very good, but in some (rare!) circumstances
2854                  * (SIT, PtP, NBMA NOARP links) it is handy to allow
2855                  * some exceptions. --ANK
2856                  * We allow IPv4-mapped nexthops to support RFC4798-type
2857                  * addressing
2858                  */
2859                 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2860                         NL_SET_ERR_MSG(extack, "Invalid gateway address");
2861                         goto out;
2862                 }
2863
2864                 if (cfg->fc_flags & RTNH_F_ONLINK)
2865                         err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2866                 else
2867                         err = ip6_route_check_nh(net, cfg, _dev, idev);
2868
2869                 if (err)
2870                         goto out;
2871         }
2872
2873         /* reload in case device was changed */
2874         dev = *_dev;
2875
2876         err = -EINVAL;
2877         if (!dev) {
2878                 NL_SET_ERR_MSG(extack, "Egress device not specified");
2879                 goto out;
2880         } else if (dev->flags & IFF_LOOPBACK) {
2881                 NL_SET_ERR_MSG(extack,
2882                                "Egress device can not be loopback device for this route");
2883                 goto out;
2884         }
2885
2886         /* if we did not check gw_addr above, do so now that the
2887          * egress device has been resolved.
2888          */
2889         if (need_addr_check &&
2890             ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2891                 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2892                 goto out;
2893         }
2894
2895         err = 0;
2896 out:
2897         return err;
2898 }
2899
2900 static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
2901                                               gfp_t gfp_flags,
2902                                               struct netlink_ext_ack *extack)
2903 {
2904         struct net *net = cfg->fc_nlinfo.nl_net;
2905         struct fib6_info *rt = NULL;
2906         struct net_device *dev = NULL;
2907         struct inet6_dev *idev = NULL;
2908         struct fib6_table *table;
2909         int addr_type;
2910         int err = -EINVAL;
2911
2912         /* RTF_PCPU is an internal flag; can not be set by userspace */
2913         if (cfg->fc_flags & RTF_PCPU) {
2914                 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
2915                 goto out;
2916         }
2917
2918         /* RTF_CACHE is an internal flag; can not be set by userspace */
2919         if (cfg->fc_flags & RTF_CACHE) {
2920                 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2921                 goto out;
2922         }
2923
2924         if (cfg->fc_type > RTN_MAX) {
2925                 NL_SET_ERR_MSG(extack, "Invalid route type");
2926                 goto out;
2927         }
2928
2929         if (cfg->fc_dst_len > 128) {
2930                 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2931                 goto out;
2932         }
2933         if (cfg->fc_src_len > 128) {
2934                 NL_SET_ERR_MSG(extack, "Invalid source address length");
2935                 goto out;
2936         }
2937 #ifndef CONFIG_IPV6_SUBTREES
2938         if (cfg->fc_src_len) {
2939                 NL_SET_ERR_MSG(extack,
2940                                "Specifying source address requires IPV6_SUBTREES to be enabled");
2941                 goto out;
2942         }
2943 #endif
2944         if (cfg->fc_ifindex) {
2945                 err = -ENODEV;
2946                 dev = dev_get_by_index(net, cfg->fc_ifindex);
2947                 if (!dev)
2948                         goto out;
2949                 idev = in6_dev_get(dev);
2950                 if (!idev)
2951                         goto out;
2952         }
2953
2954         if (cfg->fc_metric == 0)
2955                 cfg->fc_metric = IP6_RT_PRIO_USER;
2956
2957         if (cfg->fc_flags & RTNH_F_ONLINK) {
2958                 if (!dev) {
2959                         NL_SET_ERR_MSG(extack,
2960                                        "Nexthop device required for onlink");
2961                         err = -ENODEV;
2962                         goto out;
2963                 }
2964
2965                 if (!(dev->flags & IFF_UP)) {
2966                         NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2967                         err = -ENETDOWN;
2968                         goto out;
2969                 }
2970         }
2971
2972         err = -ENOBUFS;
2973         if (cfg->fc_nlinfo.nlh &&
2974             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
2975                 table = fib6_get_table(net, cfg->fc_table);
2976                 if (!table) {
2977                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
2978                         table = fib6_new_table(net, cfg->fc_table);
2979                 }
2980         } else {
2981                 table = fib6_new_table(net, cfg->fc_table);
2982         }
2983
2984         if (!table)
2985                 goto out;
2986
2987         err = -ENOMEM;
2988         rt = fib6_info_alloc(gfp_flags);
2989         if (!rt)
2990                 goto out;
2991
2992         rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
2993                                                extack);
2994         if (IS_ERR(rt->fib6_metrics)) {
2995                 err = PTR_ERR(rt->fib6_metrics);
2996                 /* Do not leave garbage there. */
2997                 rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
2998                 goto out;
2999         }
3000
3001         if (cfg->fc_flags & RTF_ADDRCONF)
3002                 rt->dst_nocount = true;
3003
3004         if (cfg->fc_flags & RTF_EXPIRES)
3005                 fib6_set_expires(rt, jiffies +
3006                                 clock_t_to_jiffies(cfg->fc_expires));
3007         else
3008                 fib6_clean_expires(rt);
3009
3010         if (cfg->fc_protocol == RTPROT_UNSPEC)
3011                 cfg->fc_protocol = RTPROT_BOOT;
3012         rt->fib6_protocol = cfg->fc_protocol;
3013
3014         addr_type = ipv6_addr_type(&cfg->fc_dst);
3015
3016         if (cfg->fc_encap) {
3017                 struct lwtunnel_state *lwtstate;
3018
3019                 err = lwtunnel_build_state(cfg->fc_encap_type,
3020                                            cfg->fc_encap, AF_INET6, cfg,
3021                                            &lwtstate, extack);
3022                 if (err)
3023                         goto out;
3024                 rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
3025         }
3026
3027         ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3028         rt->fib6_dst.plen = cfg->fc_dst_len;
3029         if (rt->fib6_dst.plen == 128)
3030                 rt->dst_host = true;
3031
3032 #ifdef CONFIG_IPV6_SUBTREES
3033         ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3034         rt->fib6_src.plen = cfg->fc_src_len;
3035 #endif
3036
3037         rt->fib6_metric = cfg->fc_metric;
3038         rt->fib6_nh.nh_weight = 1;
3039
3040         rt->fib6_type = cfg->fc_type;
3041
3042         /* We cannot add true routes via loopback here,
3043            they would result in kernel looping; promote them to reject routes
3044          */
3045         if ((cfg->fc_flags & RTF_REJECT) ||
3046             (dev && (dev->flags & IFF_LOOPBACK) &&
3047              !(addr_type & IPV6_ADDR_LOOPBACK) &&
3048              !(cfg->fc_flags & RTF_LOCAL))) {
3049                 /* hold loopback dev/idev if we haven't done so. */
3050                 if (dev != net->loopback_dev) {
3051                         if (dev) {
3052                                 dev_put(dev);
3053                                 in6_dev_put(idev);
3054                         }
3055                         dev = net->loopback_dev;
3056                         dev_hold(dev);
3057                         idev = in6_dev_get(dev);
3058                         if (!idev) {
3059                                 err = -ENODEV;
3060                                 goto out;
3061                         }
3062                 }
3063                 rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;
3064                 goto install_route;
3065         }
3066
3067         if (cfg->fc_flags & RTF_GATEWAY) {
3068                 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3069                 if (err)
3070                         goto out;
3071
3072                 rt->fib6_nh.nh_gw = cfg->fc_gateway;
3073         }
3074
3075         err = -ENODEV;
3076         if (!dev)
3077                 goto out;
3078
3079         if (idev->cnf.disable_ipv6) {
3080                 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3081                 err = -EACCES;
3082                 goto out;
3083         }
3084
3085         if (!(dev->flags & IFF_UP)) {
3086                 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3087                 err = -ENETDOWN;
3088                 goto out;
3089         }
3090
3091         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3092                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
3093                         NL_SET_ERR_MSG(extack, "Invalid source address");
3094                         err = -EINVAL;
3095                         goto out;
3096                 }
3097                 rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3098                 rt->fib6_prefsrc.plen = 128;
3099         } else
3100                 rt->fib6_prefsrc.plen = 0;
3101
3102         rt->fib6_flags = cfg->fc_flags;
3103
3104 install_route:
3105         if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3106             !netif_carrier_ok(dev))
3107                 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3108         rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
3109         rt->fib6_nh.nh_dev = dev;
3110         rt->fib6_table = table;
3111
3112         if (idev)
3113                 in6_dev_put(idev);
3114
3115         return rt;
3116 out:
3117         if (dev)
3118                 dev_put(dev);
3119         if (idev)
3120                 in6_dev_put(idev);
3121
3122         fib6_info_release(rt);
3123         return ERR_PTR(err);
3124 }
3125
3126 int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3127                   struct netlink_ext_ack *extack)
3128 {
3129         struct fib6_info *rt;
3130         int err;
3131
3132         rt = ip6_route_info_create(cfg, gfp_flags, extack);
3133         if (IS_ERR(rt))
3134                 return PTR_ERR(rt);
3135
3136         err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
3137         fib6_info_release(rt);
3138
3139         return err;
3140 }
3141
3142 static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
3143 {
3144         struct net *net = info->nl_net;
3145         struct fib6_table *table;
3146         int err;
3147
3148         if (rt == net->ipv6.fib6_null_entry) {
3149                 err = -ENOENT;
3150                 goto out;
3151         }
3152
3153         table = rt->fib6_table;
3154         spin_lock_bh(&table->tb6_lock);
3155         err = fib6_del(rt, info);
3156         spin_unlock_bh(&table->tb6_lock);
3157
3158 out:
3159         fib6_info_release(rt);
3160         return err;
3161 }
3162
3163 int ip6_del_rt(struct net *net, struct fib6_info *rt)
3164 {
3165         struct nl_info info = { .nl_net = net };
3166
3167         return __ip6_del_rt(rt, &info);
3168 }
3169
3170 static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
3171 {
3172         struct nl_info *info = &cfg->fc_nlinfo;
3173         struct net *net = info->nl_net;
3174         struct sk_buff *skb = NULL;
3175         struct fib6_table *table;
3176         int err = -ENOENT;
3177
3178         if (rt == net->ipv6.fib6_null_entry)
3179                 goto out_put;
3180         table = rt->fib6_table;
3181         spin_lock_bh(&table->tb6_lock);
3182
3183         if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
3184                 struct fib6_info *sibling, *next_sibling;
3185
3186                 /* prefer to send a single notification with all hops */
3187                 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3188                 if (skb) {
3189                         u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3190
3191                         if (rt6_fill_node(net, skb, rt, NULL,
3192                                           NULL, NULL, 0, RTM_DELROUTE,
3193                                           info->portid, seq, 0) < 0) {
3194                                 kfree_skb(skb);
3195                                 skb = NULL;
3196                         } else
3197                                 info->skip_notify = 1;
3198                 }
3199
3200                 list_for_each_entry_safe(sibling, next_sibling,
3201                                          &rt->fib6_siblings,
3202                                          fib6_siblings) {
3203                         err = fib6_del(sibling, info);
3204                         if (err)
3205                                 goto out_unlock;
3206                 }
3207         }
3208
3209         err = fib6_del(rt, info);
3210 out_unlock:
3211         spin_unlock_bh(&table->tb6_lock);
3212 out_put:
3213         fib6_info_release(rt);
3214
3215         if (skb) {
3216                 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3217                             info->nlh, gfp_any());
3218         }
3219         return err;
3220 }
3221
3222 static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3223 {
3224         int rc = -ESRCH;
3225
3226         if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3227                 goto out;
3228
3229         if (cfg->fc_flags & RTF_GATEWAY &&
3230             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3231                 goto out;
3232
3233         rc = rt6_remove_exception_rt(rt);
3234 out:
3235         return rc;
3236 }
3237
3238 static int ip6_route_del(struct fib6_config *cfg,
3239                          struct netlink_ext_ack *extack)
3240 {
3241         struct rt6_info *rt_cache;
3242         struct fib6_table *table;
3243         struct fib6_info *rt;
3244         struct fib6_node *fn;
3245         int err = -ESRCH;
3246
3247         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
3248         if (!table) {
3249                 NL_SET_ERR_MSG(extack, "FIB table does not exist");
3250                 return err;
3251         }
3252
3253         rcu_read_lock();
3254
3255         fn = fib6_locate(&table->tb6_root,
3256                          &cfg->fc_dst, cfg->fc_dst_len,
3257                          &cfg->fc_src, cfg->fc_src_len,
3258                          !(cfg->fc_flags & RTF_CACHE));
3259
3260         if (fn) {
3261                 for_each_fib6_node_rt_rcu(fn) {
3262                         if (cfg->fc_flags & RTF_CACHE) {
3263                                 int rc;
3264
3265                                 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3266                                                               &cfg->fc_src);
3267                                 if (rt_cache) {
3268                                         rc = ip6_del_cached_rt(rt_cache, cfg);
3269                                         if (rc != -ESRCH) {
3270                                                 rcu_read_unlock();
3271                                                 return rc;
3272                                         }
3273                                 }
3274                                 continue;
3275                         }
3276                         if (cfg->fc_ifindex &&
3277                             (!rt->fib6_nh.nh_dev ||
3278                              rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
3279                                 continue;
3280                         if (cfg->fc_flags & RTF_GATEWAY &&
3281                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
3282                                 continue;
3283                         if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
3284                                 continue;
3285                         if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
3286                                 continue;
3287                         if (!fib6_info_hold_safe(rt))
3288                                 continue;
3289                         rcu_read_unlock();
3290
3291                         /* if gateway was specified only delete the one hop */
3292                         if (cfg->fc_flags & RTF_GATEWAY)
3293                                 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3294
3295                         return __ip6_del_rt_siblings(rt, cfg);
3296                 }
3297         }
3298         rcu_read_unlock();
3299
3300         return err;
3301 }
3302
3303 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
3304 {
3305         struct netevent_redirect netevent;
3306         struct rt6_info *rt, *nrt = NULL;
3307         struct ndisc_options ndopts;
3308         struct inet6_dev *in6_dev;
3309         struct neighbour *neigh;
3310         struct fib6_info *from;
3311         struct rd_msg *msg;
3312         int optlen, on_link;
3313         u8 *lladdr;
3314
3315         optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
3316         optlen -= sizeof(*msg);
3317
3318         if (optlen < 0) {
3319                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
3320                 return;
3321         }
3322
3323         msg = (struct rd_msg *)icmp6_hdr(skb);
3324
3325         if (ipv6_addr_is_multicast(&msg->dest)) {
3326                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
3327                 return;
3328         }
3329
3330         on_link = 0;
3331         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
3332                 on_link = 1;
3333         } else if (ipv6_addr_type(&msg->target) !=
3334                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
3335                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
3336                 return;
3337         }
3338
3339         in6_dev = __in6_dev_get(skb->dev);
3340         if (!in6_dev)
3341                 return;
3342         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3343                 return;
3344
3345         /* RFC2461 8.1:
3346          *      The IP source address of the Redirect MUST be the same as the current
3347          *      first-hop router for the specified ICMP Destination Address.
3348          */
3349
3350         if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
3351                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3352                 return;
3353         }
3354
3355         lladdr = NULL;
3356         if (ndopts.nd_opts_tgt_lladdr) {
3357                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3358                                              skb->dev);
3359                 if (!lladdr) {
3360                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3361                         return;
3362                 }
3363         }
3364
3365         rt = (struct rt6_info *) dst;
3366         if (rt->rt6i_flags & RTF_REJECT) {
3367                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
3368                 return;
3369         }
3370
3371         /* Redirect received -> path was valid.
3372          * Look, redirects are sent only in response to data packets,
3373          * so that this nexthop apparently is reachable. --ANK
3374          */
3375         dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
3376
3377         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
3378         if (!neigh)
3379                 return;
3380
3381         /*
3382          *      We have finally decided to accept it.
3383          */
3384
3385         ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
3386                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
3387                      NEIGH_UPDATE_F_OVERRIDE|
3388                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
3389                                      NEIGH_UPDATE_F_ISROUTER)),
3390                      NDISC_REDIRECT, &ndopts);
3391
3392         rcu_read_lock();
3393         from = rcu_dereference(rt->from);
3394         /* This fib6_info_hold() is safe here because we hold reference to rt
3395          * and rt already holds reference to fib6_info.
3396          */
3397         fib6_info_hold(from);
3398         rcu_read_unlock();
3399
3400         nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
3401         if (!nrt)
3402                 goto out;
3403
3404         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3405         if (on_link)
3406                 nrt->rt6i_flags &= ~RTF_GATEWAY;
3407
3408         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
3409
3410         /* No need to remove rt from the exception table if rt is
3411          * a cached route because rt6_insert_exception() will
3412          * takes care of it
3413          */
3414         if (rt6_insert_exception(nrt, from)) {
3415                 dst_release_immediate(&nrt->dst);
3416                 goto out;
3417         }
3418
3419         netevent.old = &rt->dst;
3420         netevent.new = &nrt->dst;
3421         netevent.daddr = &msg->dest;
3422         netevent.neigh = neigh;
3423         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3424
3425 out:
3426         fib6_info_release(from);
3427         neigh_release(neigh);
3428 }
3429
3430 #ifdef CONFIG_IPV6_ROUTE_INFO
3431 static struct fib6_info *rt6_get_route_info(struct net *net,
3432                                            const struct in6_addr *prefix, int prefixlen,
3433                                            const struct in6_addr *gwaddr,
3434                                            struct net_device *dev)
3435 {
3436         u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3437         int ifindex = dev->ifindex;
3438         struct fib6_node *fn;
3439         struct fib6_info *rt = NULL;
3440         struct fib6_table *table;
3441
3442         table = fib6_get_table(net, tb_id);
3443         if (!table)
3444                 return NULL;
3445
3446         rcu_read_lock();
3447         fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
3448         if (!fn)
3449                 goto out;
3450
3451         for_each_fib6_node_rt_rcu(fn) {
3452                 if (rt->fib6_nh.nh_dev->ifindex != ifindex)
3453                         continue;
3454                 if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3455                         continue;
3456                 if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
3457                         continue;
3458                 if (!fib6_info_hold_safe(rt))
3459                         continue;
3460                 break;
3461         }
3462 out:
3463         rcu_read_unlock();
3464         return rt;
3465 }
3466
3467 static struct fib6_info *rt6_add_route_info(struct net *net,
3468                                            const struct in6_addr *prefix, int prefixlen,
3469                                            const struct in6_addr *gwaddr,
3470                                            struct net_device *dev,
3471                                            unsigned int pref)
3472 {
3473         struct fib6_config cfg = {
3474                 .fc_metric      = IP6_RT_PRIO_USER,
3475                 .fc_ifindex     = dev->ifindex,
3476                 .fc_dst_len     = prefixlen,
3477                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3478                                   RTF_UP | RTF_PREF(pref),
3479                 .fc_protocol = RTPROT_RA,
3480                 .fc_type = RTN_UNICAST,
3481                 .fc_nlinfo.portid = 0,
3482                 .fc_nlinfo.nlh = NULL,
3483                 .fc_nlinfo.nl_net = net,
3484         };
3485
3486         cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
3487         cfg.fc_dst = *prefix;
3488         cfg.fc_gateway = *gwaddr;
3489
3490         /* We should treat it as a default route if prefix length is 0. */
3491         if (!prefixlen)
3492                 cfg.fc_flags |= RTF_DEFAULT;
3493
3494         ip6_route_add(&cfg, GFP_ATOMIC, NULL);
3495
3496         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
3497 }
3498 #endif
3499
3500 struct fib6_info *rt6_get_dflt_router(struct net *net,
3501                                      const struct in6_addr *addr,
3502                                      struct net_device *dev)
3503 {
3504         u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
3505         struct fib6_info *rt;
3506         struct fib6_table *table;
3507
3508         table = fib6_get_table(net, tb_id);
3509         if (!table)
3510                 return NULL;
3511
3512         rcu_read_lock();
3513         for_each_fib6_node_rt_rcu(&table->tb6_root) {
3514                 if (dev == rt->fib6_nh.nh_dev &&
3515                     ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
3516                     ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
3517                         break;
3518         }
3519         if (rt && !fib6_info_hold_safe(rt))
3520                 rt = NULL;
3521         rcu_read_unlock();
3522         return rt;
3523 }
3524
3525 struct fib6_info *rt6_add_dflt_router(struct net *net,
3526                                      const struct in6_addr *gwaddr,
3527                                      struct net_device *dev,
3528                                      unsigned int pref)
3529 {
3530         struct fib6_config cfg = {
3531                 .fc_table       = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
3532                 .fc_metric      = IP6_RT_PRIO_USER,
3533                 .fc_ifindex     = dev->ifindex,
3534                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3535                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
3536                 .fc_protocol = RTPROT_RA,
3537                 .fc_type = RTN_UNICAST,
3538                 .fc_nlinfo.portid = 0,
3539                 .fc_nlinfo.nlh = NULL,
3540                 .fc_nlinfo.nl_net = net,
3541         };
3542
3543         cfg.fc_gateway = *gwaddr;
3544
3545         if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
3546                 struct fib6_table *table;
3547
3548                 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3549                 if (table)
3550                         table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3551         }
3552
3553         return rt6_get_dflt_router(net, gwaddr, dev);
3554 }
3555
3556 static void __rt6_purge_dflt_routers(struct net *net,
3557                                      struct fib6_table *table)
3558 {
3559         struct fib6_info *rt;
3560
3561 restart:
3562         rcu_read_lock();
3563         for_each_fib6_node_rt_rcu(&table->tb6_root) {
3564                 struct net_device *dev = fib6_info_nh_dev(rt);
3565                 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3566
3567                 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3568                     (!idev || idev->cnf.accept_ra != 2) &&
3569                     fib6_info_hold_safe(rt)) {
3570                         rcu_read_unlock();
3571                         ip6_del_rt(net, rt);
3572                         goto restart;
3573                 }
3574         }
3575         rcu_read_unlock();
3576
3577         table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3578 }
3579
3580 void rt6_purge_dflt_routers(struct net *net)
3581 {
3582         struct fib6_table *table;
3583         struct hlist_head *head;
3584         unsigned int h;
3585
3586         rcu_read_lock();
3587
3588         for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3589                 head = &net->ipv6.fib_table_hash[h];
3590                 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3591                         if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3592                                 __rt6_purge_dflt_routers(net, table);
3593                 }
3594         }
3595
3596         rcu_read_unlock();
3597 }
3598
3599 static void rtmsg_to_fib6_config(struct net *net,
3600                                  struct in6_rtmsg *rtmsg,
3601                                  struct fib6_config *cfg)
3602 {
3603         *cfg = (struct fib6_config){
3604                 .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3605                          : RT6_TABLE_MAIN,
3606                 .fc_ifindex = rtmsg->rtmsg_ifindex,
3607                 .fc_metric = rtmsg->rtmsg_metric,
3608                 .fc_expires = rtmsg->rtmsg_info,
3609                 .fc_dst_len = rtmsg->rtmsg_dst_len,
3610                 .fc_src_len = rtmsg->rtmsg_src_len,
3611                 .fc_flags = rtmsg->rtmsg_flags,
3612                 .fc_type = rtmsg->rtmsg_type,
3613
3614                 .fc_nlinfo.nl_net = net,
3615
3616                 .fc_dst = rtmsg->rtmsg_dst,
3617                 .fc_src = rtmsg->rtmsg_src,
3618                 .fc_gateway = rtmsg->rtmsg_gateway,
3619         };
3620 }
3621
3622 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3623 {
3624         struct fib6_config cfg;
3625         struct in6_rtmsg rtmsg;
3626         int err;
3627
3628         switch (cmd) {
3629         case SIOCADDRT:         /* Add a route */
3630         case SIOCDELRT:         /* Delete a route */
3631                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
3632                         return -EPERM;
3633                 err = copy_from_user(&rtmsg, arg,
3634                                      sizeof(struct in6_rtmsg));
3635                 if (err)
3636                         return -EFAULT;
3637
3638                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
3639
3640                 rtnl_lock();
3641                 switch (cmd) {
3642                 case SIOCADDRT:
3643                         err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
3644                         break;
3645                 case SIOCDELRT:
3646                         err = ip6_route_del(&cfg, NULL);
3647                         break;
3648                 default:
3649                         err = -EINVAL;
3650                 }
3651                 rtnl_unlock();
3652
3653                 return err;
3654         }
3655
3656         return -EINVAL;
3657 }
3658
3659 /*
3660  *      Drop the packet on the floor
3661  */
3662
3663 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
3664 {
3665         int type;
3666         struct dst_entry *dst = skb_dst(skb);
3667         switch (ipstats_mib_noroutes) {
3668         case IPSTATS_MIB_INNOROUTES:
3669                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
3670                 if (type == IPV6_ADDR_ANY) {
3671                         IP6_INC_STATS(dev_net(dst->dev),
3672                                       __in6_dev_get_safely(skb->dev),
3673                                       IPSTATS_MIB_INADDRERRORS);
3674                         break;
3675                 }
3676                 /* FALLTHROUGH */
3677         case IPSTATS_MIB_OUTNOROUTES:
3678                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3679                               ipstats_mib_noroutes);
3680                 break;
3681         }
3682         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
3683         kfree_skb(skb);
3684         return 0;
3685 }
3686
3687 static int ip6_pkt_discard(struct sk_buff *skb)
3688 {
3689         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
3690 }
3691
3692 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3693 {
3694         skb->dev = skb_dst(skb)->dev;
3695         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
3696 }
3697
3698 static int ip6_pkt_prohibit(struct sk_buff *skb)
3699 {
3700         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
3701 }
3702
3703 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3704 {
3705         skb->dev = skb_dst(skb)->dev;
3706         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
3707 }
3708
3709 /*
3710  *      Allocate a dst for local (unicast / anycast) address.
3711  */
3712
3713 struct fib6_info *addrconf_f6i_alloc(struct net *net,
3714                                      struct inet6_dev *idev,
3715                                      const struct in6_addr *addr,
3716                                      bool anycast, gfp_t gfp_flags)
3717 {
3718         u32 tb_id;
3719         struct net_device *dev = idev->dev;
3720         struct fib6_info *f6i;
3721
3722         f6i = fib6_info_alloc(gfp_flags);
3723         if (!f6i)
3724                 return ERR_PTR(-ENOMEM);
3725
3726         f6i->fib6_metrics = ip_fib_metrics_init(net, NULL, 0, NULL);
3727         f6i->dst_nocount = true;
3728         f6i->dst_host = true;
3729         f6i->fib6_protocol = RTPROT_KERNEL;
3730         f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP;
3731         if (anycast) {
3732                 f6i->fib6_type = RTN_ANYCAST;
3733                 f6i->fib6_flags |= RTF_ANYCAST;
3734         } else {
3735                 f6i->fib6_type = RTN_LOCAL;
3736                 f6i->fib6_flags |= RTF_LOCAL;
3737         }
3738
3739         f6i->fib6_nh.nh_gw = *addr;
3740         dev_hold(dev);
3741         f6i->fib6_nh.nh_dev = dev;
3742         f6i->fib6_dst.addr = *addr;
3743         f6i->fib6_dst.plen = 128;
3744         tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3745         f6i->fib6_table = fib6_get_table(net, tb_id);
3746
3747         return f6i;
3748 }
3749
3750 /* remove deleted ip from prefsrc entries */
3751 struct arg_dev_net_ip {
3752         struct net_device *dev;
3753         struct net *net;
3754         struct in6_addr *addr;
3755 };
3756
3757 static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
3758 {
3759         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3760         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3761         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3762
3763         if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
3764             rt != net->ipv6.fib6_null_entry &&
3765             ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
3766                 spin_lock_bh(&rt6_exception_lock);
3767                 /* remove prefsrc entry */
3768                 rt->fib6_prefsrc.plen = 0;
3769                 spin_unlock_bh(&rt6_exception_lock);
3770         }
3771         return 0;
3772 }
3773
3774 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3775 {
3776         struct net *net = dev_net(ifp->idev->dev);
3777         struct arg_dev_net_ip adni = {
3778                 .dev = ifp->idev->dev,
3779                 .net = net,
3780                 .addr = &ifp->addr,
3781         };
3782         fib6_clean_all(net, fib6_remove_prefsrc, &adni);
3783 }
3784
3785 #define RTF_RA_ROUTER           (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
3786
3787 /* Remove routers and update dst entries when gateway turn into host. */
3788 static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
3789 {
3790         struct in6_addr *gateway = (struct in6_addr *)arg;
3791
3792         if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
3793             ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
3794                 return -1;
3795         }
3796
3797         /* Further clean up cached routes in exception table.
3798          * This is needed because cached route may have a different
3799          * gateway than its 'parent' in the case of an ip redirect.
3800          */
3801         rt6_exceptions_clean_tohost(rt, gateway);
3802
3803         return 0;
3804 }
3805
3806 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3807 {
3808         fib6_clean_all(net, fib6_clean_tohost, gateway);
3809 }
3810
3811 struct arg_netdev_event {
3812         const struct net_device *dev;
3813         union {
3814                 unsigned int nh_flags;
3815                 unsigned long event;
3816         };
3817 };
3818
3819 static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
3820 {
3821         struct fib6_info *iter;
3822         struct fib6_node *fn;
3823
3824         fn = rcu_dereference_protected(rt->fib6_node,
3825                         lockdep_is_held(&rt->fib6_table->tb6_lock));
3826         iter = rcu_dereference_protected(fn->leaf,
3827                         lockdep_is_held(&rt->fib6_table->tb6_lock));
3828         while (iter) {
3829                 if (iter->fib6_metric == rt->fib6_metric &&
3830                     rt6_qualify_for_ecmp(iter))
3831                         return iter;
3832                 iter = rcu_dereference_protected(iter->fib6_next,
3833                                 lockdep_is_held(&rt->fib6_table->tb6_lock));
3834         }
3835
3836         return NULL;
3837 }
3838
3839 static bool rt6_is_dead(const struct fib6_info *rt)
3840 {
3841         if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3842             (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
3843              fib6_ignore_linkdown(rt)))
3844                 return true;
3845
3846         return false;
3847 }
3848
3849 static int rt6_multipath_total_weight(const struct fib6_info *rt)
3850 {
3851         struct fib6_info *iter;
3852         int total = 0;
3853
3854         if (!rt6_is_dead(rt))
3855                 total += rt->fib6_nh.nh_weight;
3856
3857         list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
3858                 if (!rt6_is_dead(iter))
3859                         total += iter->fib6_nh.nh_weight;
3860         }
3861
3862         return total;
3863 }
3864
3865 static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
3866 {
3867         int upper_bound = -1;
3868
3869         if (!rt6_is_dead(rt)) {
3870                 *weight += rt->fib6_nh.nh_weight;
3871                 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3872                                                     total) - 1;
3873         }
3874         atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
3875 }
3876
3877 static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
3878 {
3879         struct fib6_info *iter;
3880         int weight = 0;
3881
3882         rt6_upper_bound_set(rt, &weight, total);
3883
3884         list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3885                 rt6_upper_bound_set(iter, &weight, total);
3886 }
3887
3888 void rt6_multipath_rebalance(struct fib6_info *rt)
3889 {
3890         struct fib6_info *first;
3891         int total;
3892
3893         /* In case the entire multipath route was marked for flushing,
3894          * then there is no need to rebalance upon the removal of every
3895          * sibling route.
3896          */
3897         if (!rt->fib6_nsiblings || rt->should_flush)
3898                 return;
3899
3900         /* During lookup routes are evaluated in order, so we need to
3901          * make sure upper bounds are assigned from the first sibling
3902          * onwards.
3903          */
3904         first = rt6_multipath_first_sibling(rt);
3905         if (WARN_ON_ONCE(!first))
3906                 return;
3907
3908         total = rt6_multipath_total_weight(first);
3909         rt6_multipath_upper_bound_set(first, total);
3910 }
3911
3912 static int fib6_ifup(struct fib6_info *rt, void *p_arg)
3913 {
3914         const struct arg_netdev_event *arg = p_arg;
3915         struct net *net = dev_net(arg->dev);
3916
3917         if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
3918                 rt->fib6_nh.nh_flags &= ~arg->nh_flags;
3919                 fib6_update_sernum_upto_root(net, rt);
3920                 rt6_multipath_rebalance(rt);
3921         }
3922
3923         return 0;
3924 }
3925
3926 void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3927 {
3928         struct arg_netdev_event arg = {
3929                 .dev = dev,
3930                 {
3931                         .nh_flags = nh_flags,
3932                 },
3933         };
3934
3935         if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3936                 arg.nh_flags |= RTNH_F_LINKDOWN;
3937
3938         fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3939 }
3940
3941 static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
3942                                    const struct net_device *dev)
3943 {
3944         struct fib6_info *iter;
3945
3946         if (rt->fib6_nh.nh_dev == dev)
3947                 return true;
3948         list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3949                 if (iter->fib6_nh.nh_dev == dev)
3950                         return true;
3951
3952         return false;
3953 }
3954
3955 static void rt6_multipath_flush(struct fib6_info *rt)
3956 {
3957         struct fib6_info *iter;
3958
3959         rt->should_flush = 1;
3960         list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3961                 iter->should_flush = 1;
3962 }
3963
3964 static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
3965                                              const struct net_device *down_dev)
3966 {
3967         struct fib6_info *iter;
3968         unsigned int dead = 0;
3969
3970         if (rt->fib6_nh.nh_dev == down_dev ||
3971             rt->fib6_nh.nh_flags & RTNH_F_DEAD)
3972                 dead++;
3973         list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3974                 if (iter->fib6_nh.nh_dev == down_dev ||
3975                     iter->fib6_nh.nh_flags & RTNH_F_DEAD)
3976                         dead++;
3977
3978         return dead;
3979 }
3980
3981 static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
3982                                        const struct net_device *dev,
3983                                        unsigned int nh_flags)
3984 {
3985         struct fib6_info *iter;
3986
3987         if (rt->fib6_nh.nh_dev == dev)
3988                 rt->fib6_nh.nh_flags |= nh_flags;
3989         list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3990                 if (iter->fib6_nh.nh_dev == dev)
3991                         iter->fib6_nh.nh_flags |= nh_flags;
3992 }
3993
3994 /* called with write lock held for table with rt */
3995 static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
3996 {
3997         const struct arg_netdev_event *arg = p_arg;
3998         const struct net_device *dev = arg->dev;
3999         struct net *net = dev_net(dev);
4000
4001         if (rt == net->ipv6.fib6_null_entry)
4002                 return 0;
4003
4004         switch (arg->event) {
4005         case NETDEV_UNREGISTER:
4006                 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
4007         case NETDEV_DOWN:
4008                 if (rt->should_flush)
4009                         return -1;
4010                 if (!rt->fib6_nsiblings)
4011                         return rt->fib6_nh.nh_dev == dev ? -1 : 0;
4012                 if (rt6_multipath_uses_dev(rt, dev)) {
4013                         unsigned int count;
4014
4015                         count = rt6_multipath_dead_count(rt, dev);
4016                         if (rt->fib6_nsiblings + 1 == count) {
4017                                 rt6_multipath_flush(rt);
4018                                 return -1;
4019                         }
4020                         rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4021                                                    RTNH_F_LINKDOWN);
4022                         fib6_update_sernum(net, rt);
4023                         rt6_multipath_rebalance(rt);
4024                 }
4025                 return -2;
4026         case NETDEV_CHANGE:
4027                 if (rt->fib6_nh.nh_dev != dev ||
4028                     rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
4029                         break;
4030                 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
4031                 rt6_multipath_rebalance(rt);
4032                 break;
4033         }
4034
4035         return 0;
4036 }
4037
4038 void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
4039 {
4040         struct arg_netdev_event arg = {
4041                 .dev = dev,
4042                 {
4043                         .event = event,
4044                 },
4045         };
4046         struct net *net = dev_net(dev);
4047
4048         if (net->ipv6.sysctl.skip_notify_on_dev_down)
4049                 fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
4050         else
4051                 fib6_clean_all(net, fib6_ifdown, &arg);
4052 }
4053
4054 void rt6_disable_ip(struct net_device *dev, unsigned long event)
4055 {
4056         rt6_sync_down_dev(dev, event);
4057         rt6_uncached_list_flush_dev(dev_net(dev), dev);
4058         neigh_ifdown(&nd_tbl, dev);
4059 }
4060
4061 struct rt6_mtu_change_arg {
4062         struct net_device *dev;
4063         unsigned int mtu;
4064 };
4065
4066 static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
4067 {
4068         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4069         struct inet6_dev *idev;
4070
4071         /* In IPv6 pmtu discovery is not optional,
4072            so that RTAX_MTU lock cannot disable it.
4073            We still use this lock to block changes
4074            caused by addrconf/ndisc.
4075         */
4076
4077         idev = __in6_dev_get(arg->dev);
4078         if (!idev)
4079                 return 0;
4080
4081         /* For administrative MTU increase, there is no way to discover
4082            IPv6 PMTU increase, so PMTU increase should be updated here.
4083            Since RFC 1981 doesn't include administrative MTU increase
4084            update PMTU increase is a MUST. (i.e. jumbo frame)
4085          */
4086         if (rt->fib6_nh.nh_dev == arg->dev &&
4087             !fib6_metric_locked(rt, RTAX_MTU)) {
4088                 u32 mtu = rt->fib6_pmtu;
4089
4090                 if (mtu >= arg->mtu ||
4091                     (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4092                         fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4093
4094                 spin_lock_bh(&rt6_exception_lock);
4095                 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
4096                 spin_unlock_bh(&rt6_exception_lock);
4097         }
4098         return 0;
4099 }
4100
4101 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
4102 {
4103         struct rt6_mtu_change_arg arg = {
4104                 .dev = dev,
4105                 .mtu = mtu,
4106         };
4107
4108         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
4109 }
4110
4111 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
4112         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
4113         [RTA_PREFSRC]           = { .len = sizeof(struct in6_addr) },
4114         [RTA_OIF]               = { .type = NLA_U32 },
4115         [RTA_IIF]               = { .type = NLA_U32 },
4116         [RTA_PRIORITY]          = { .type = NLA_U32 },
4117         [RTA_METRICS]           = { .type = NLA_NESTED },
4118         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
4119         [RTA_PREF]              = { .type = NLA_U8 },
4120         [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
4121         [RTA_ENCAP]             = { .type = NLA_NESTED },
4122         [RTA_EXPIRES]           = { .type = NLA_U32 },
4123         [RTA_UID]               = { .type = NLA_U32 },
4124         [RTA_MARK]              = { .type = NLA_U32 },
4125         [RTA_TABLE]             = { .type = NLA_U32 },
4126         [RTA_IP_PROTO]          = { .type = NLA_U8 },
4127         [RTA_SPORT]             = { .type = NLA_U16 },
4128         [RTA_DPORT]             = { .type = NLA_U16 },
4129 };
4130
4131 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
4132                               struct fib6_config *cfg,
4133                               struct netlink_ext_ack *extack)
4134 {
4135         struct rtmsg *rtm;
4136         struct nlattr *tb[RTA_MAX+1];
4137         unsigned int pref;
4138         int err;
4139
4140         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4141                           extack);
4142         if (err < 0)
4143                 goto errout;
4144
4145         err = -EINVAL;
4146         rtm = nlmsg_data(nlh);
4147
4148         *cfg = (struct fib6_config){
4149                 .fc_table = rtm->rtm_table,
4150                 .fc_dst_len = rtm->rtm_dst_len,
4151                 .fc_src_len = rtm->rtm_src_len,
4152                 .fc_flags = RTF_UP,
4153                 .fc_protocol = rtm->rtm_protocol,
4154                 .fc_type = rtm->rtm_type,
4155
4156                 .fc_nlinfo.portid = NETLINK_CB(skb).portid,
4157                 .fc_nlinfo.nlh = nlh,
4158                 .fc_nlinfo.nl_net = sock_net(skb->sk),
4159         };
4160
4161         if (rtm->rtm_type == RTN_UNREACHABLE ||
4162             rtm->rtm_type == RTN_BLACKHOLE ||
4163             rtm->rtm_type == RTN_PROHIBIT ||
4164             rtm->rtm_type == RTN_THROW)
4165                 cfg->fc_flags |= RTF_REJECT;
4166
4167         if (rtm->rtm_type == RTN_LOCAL)
4168                 cfg->fc_flags |= RTF_LOCAL;
4169
4170         if (rtm->rtm_flags & RTM_F_CLONED)
4171                 cfg->fc_flags |= RTF_CACHE;
4172
4173         cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4174
4175         if (tb[RTA_GATEWAY]) {
4176                 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
4177                 cfg->fc_flags |= RTF_GATEWAY;
4178         }
4179         if (tb[RTA_VIA]) {
4180                 NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
4181                 goto errout;
4182         }
4183
4184         if (tb[RTA_DST]) {
4185                 int plen = (rtm->rtm_dst_len + 7) >> 3;
4186
4187                 if (nla_len(tb[RTA_DST]) < plen)
4188                         goto errout;
4189
4190                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
4191         }
4192
4193         if (tb[RTA_SRC]) {
4194                 int plen = (rtm->rtm_src_len + 7) >> 3;
4195
4196                 if (nla_len(tb[RTA_SRC]) < plen)
4197                         goto errout;
4198
4199                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
4200         }
4201
4202         if (tb[RTA_PREFSRC])
4203                 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
4204
4205         if (tb[RTA_OIF])
4206                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4207
4208         if (tb[RTA_PRIORITY])
4209                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4210
4211         if (tb[RTA_METRICS]) {
4212                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4213                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
4214         }
4215
4216         if (tb[RTA_TABLE])
4217                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4218
4219         if (tb[RTA_MULTIPATH]) {
4220                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4221                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
4222
4223                 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
4224                                                      cfg->fc_mp_len, extack);
4225                 if (err < 0)
4226                         goto errout;
4227         }
4228
4229         if (tb[RTA_PREF]) {
4230                 pref = nla_get_u8(tb[RTA_PREF]);
4231                 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4232                     pref != ICMPV6_ROUTER_PREF_HIGH)
4233                         pref = ICMPV6_ROUTER_PREF_MEDIUM;
4234                 cfg->fc_flags |= RTF_PREF(pref);
4235         }
4236
4237         if (tb[RTA_ENCAP])
4238                 cfg->fc_encap = tb[RTA_ENCAP];
4239
4240         if (tb[RTA_ENCAP_TYPE]) {
4241                 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4242
4243                 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
4244                 if (err < 0)
4245                         goto errout;
4246         }
4247
4248         if (tb[RTA_EXPIRES]) {
4249                 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4250
4251                 if (addrconf_finite_timeout(timeout)) {
4252                         cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4253                         cfg->fc_flags |= RTF_EXPIRES;
4254                 }
4255         }
4256
4257         err = 0;
4258 errout:
4259         return err;
4260 }
4261
4262 struct rt6_nh {
4263         struct fib6_info *fib6_info;
4264         struct fib6_config r_cfg;
4265         struct list_head next;
4266 };
4267
4268 static int ip6_route_info_append(struct net *net,
4269                                  struct list_head *rt6_nh_list,
4270                                  struct fib6_info *rt,
4271                                  struct fib6_config *r_cfg)
4272 {
4273         struct rt6_nh *nh;
4274         int err = -EEXIST;
4275
4276         list_for_each_entry(nh, rt6_nh_list, next) {
4277                 /* check if fib6_info already exists */
4278                 if (rt6_duplicate_nexthop(nh->fib6_info, rt))
4279                         return err;
4280         }
4281
4282         nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4283         if (!nh)
4284                 return -ENOMEM;
4285         nh->fib6_info = rt;
4286         memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4287         list_add_tail(&nh->next, rt6_nh_list);
4288
4289         return 0;
4290 }
4291
4292 static void ip6_route_mpath_notify(struct fib6_info *rt,
4293                                    struct fib6_info *rt_last,
4294                                    struct nl_info *info,
4295                                    __u16 nlflags)
4296 {
4297         /* if this is an APPEND route, then rt points to the first route
4298          * inserted and rt_last points to last route inserted. Userspace
4299          * wants a consistent dump of the route which starts at the first
4300          * nexthop. Since sibling routes are always added at the end of
4301          * the list, find the first sibling of the last route appended
4302          */
4303         if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4304                 rt = list_first_entry(&rt_last->fib6_siblings,
4305                                       struct fib6_info,
4306                                       fib6_siblings);
4307         }
4308
4309         if (rt)
4310                 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4311 }
4312
4313 static int ip6_route_multipath_add(struct fib6_config *cfg,
4314                                    struct netlink_ext_ack *extack)
4315 {
4316         struct fib6_info *rt_notif = NULL, *rt_last = NULL;
4317         struct nl_info *info = &cfg->fc_nlinfo;
4318         struct fib6_config r_cfg;
4319         struct rtnexthop *rtnh;
4320         struct fib6_info *rt;
4321         struct rt6_nh *err_nh;
4322         struct rt6_nh *nh, *nh_safe;
4323         __u16 nlflags;
4324         int remaining;
4325         int attrlen;
4326         int err = 1;
4327         int nhn = 0;
4328         int replace = (cfg->fc_nlinfo.nlh &&
4329                        (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4330         LIST_HEAD(rt6_nh_list);
4331
4332         nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4333         if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4334                 nlflags |= NLM_F_APPEND;
4335
4336         remaining = cfg->fc_mp_len;
4337         rtnh = (struct rtnexthop *)cfg->fc_mp;
4338
4339         /* Parse a Multipath Entry and build a list (rt6_nh_list) of
4340          * fib6_info structs per nexthop
4341          */
4342         while (rtnh_ok(rtnh, remaining)) {
4343                 memcpy(&r_cfg, cfg, sizeof(*cfg));
4344                 if (rtnh->rtnh_ifindex)
4345                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4346
4347                 attrlen = rtnh_attrlen(rtnh);
4348                 if (attrlen > 0) {
4349                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4350
4351                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4352                         if (nla) {
4353                                 r_cfg.fc_gateway = nla_get_in6_addr(nla);
4354                                 r_cfg.fc_flags |= RTF_GATEWAY;
4355                         }
4356                         r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4357                         nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4358                         if (nla)
4359                                 r_cfg.fc_encap_type = nla_get_u16(nla);
4360                 }
4361
4362                 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
4363                 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
4364                 if (IS_ERR(rt)) {
4365                         err = PTR_ERR(rt);
4366                         rt = NULL;
4367                         goto cleanup;
4368                 }
4369                 if (!rt6_qualify_for_ecmp(rt)) {
4370                         err = -EINVAL;
4371                         NL_SET_ERR_MSG(extack,
4372                                        "Device only routes can not be added for IPv6 using the multipath API.");
4373                         fib6_info_release(rt);
4374                         goto cleanup;
4375                 }
4376
4377                 rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
4378
4379                 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4380                                             rt, &r_cfg);
4381                 if (err) {
4382                         fib6_info_release(rt);
4383                         goto cleanup;
4384                 }
4385
4386                 rtnh = rtnh_next(rtnh, &remaining);
4387         }
4388
4389         /* for add and replace send one notification with all nexthops.
4390          * Skip the notification in fib6_add_rt2node and send one with
4391          * the full route when done
4392          */
4393         info->skip_notify = 1;
4394
4395         err_nh = NULL;
4396         list_for_each_entry(nh, &rt6_nh_list, next) {
4397                 err = __ip6_ins_rt(nh->fib6_info, info, extack);
4398                 fib6_info_release(nh->fib6_info);
4399
4400                 if (!err) {
4401                         /* save reference to last route successfully inserted */
4402                         rt_last = nh->fib6_info;
4403
4404                         /* save reference to first route for notification */
4405                         if (!rt_notif)
4406                                 rt_notif = nh->fib6_info;
4407                 }
4408
4409                 /* nh->fib6_info is used or freed at this point, reset to NULL*/
4410                 nh->fib6_info = NULL;
4411                 if (err) {
4412                         if (replace && nhn)
4413                                 NL_SET_ERR_MSG_MOD(extack,
4414                                                    "multipath route replace failed (check consistency of installed routes)");
4415                         err_nh = nh;
4416                         goto add_errout;
4417                 }
4418
4419                 /* Because each route is added like a single route we remove
4420                  * these flags after the first nexthop: if there is a collision,
4421                  * we have already failed to add the first nexthop:
4422                  * fib6_add_rt2node() has rejected it; when replacing, old
4423                  * nexthops have been replaced by first new, the rest should
4424                  * be added to it.
4425                  */
4426                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4427                                                      NLM_F_REPLACE);
4428                 nhn++;
4429         }
4430
4431         /* success ... tell user about new route */
4432         ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4433         goto cleanup;
4434
4435 add_errout:
4436         /* send notification for routes that were added so that
4437          * the delete notifications sent by ip6_route_del are
4438          * coherent
4439          */
4440         if (rt_notif)
4441                 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4442
4443         /* Delete routes that were already added */
4444         list_for_each_entry(nh, &rt6_nh_list, next) {
4445                 if (err_nh == nh)
4446                         break;
4447                 ip6_route_del(&nh->r_cfg, extack);
4448         }
4449
4450 cleanup:
4451         list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
4452                 if (nh->fib6_info)
4453                         fib6_info_release(nh->fib6_info);
4454                 list_del(&nh->next);
4455                 kfree(nh);
4456         }
4457
4458         return err;
4459 }
4460
4461 static int ip6_route_multipath_del(struct fib6_config *cfg,
4462                                    struct netlink_ext_ack *extack)
4463 {
4464         struct fib6_config r_cfg;
4465         struct rtnexthop *rtnh;
4466         int remaining;
4467         int attrlen;
4468         int err = 1, last_err = 0;
4469
4470         remaining = cfg->fc_mp_len;
4471         rtnh = (struct rtnexthop *)cfg->fc_mp;
4472
4473         /* Parse a Multipath Entry */
4474         while (rtnh_ok(rtnh, remaining)) {
4475                 memcpy(&r_cfg, cfg, sizeof(*cfg));
4476                 if (rtnh->rtnh_ifindex)
4477                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4478
4479                 attrlen = rtnh_attrlen(rtnh);
4480                 if (attrlen > 0) {
4481                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4482
4483                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4484                         if (nla) {
4485                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4486                                 r_cfg.fc_flags |= RTF_GATEWAY;
4487                         }
4488                 }
4489                 err = ip6_route_del(&r_cfg, extack);
4490                 if (err)
4491                         last_err = err;
4492
4493                 rtnh = rtnh_next(rtnh, &remaining);
4494         }
4495
4496         return last_err;
4497 }
4498
4499 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4500                               struct netlink_ext_ack *extack)
4501 {
4502         struct fib6_config cfg;
4503         int err;
4504
4505         err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
4506         if (err < 0)
4507                 return err;
4508
4509         if (cfg.fc_mp)
4510                 return ip6_route_multipath_del(&cfg, extack);
4511         else {
4512                 cfg.fc_delete_all_nh = 1;
4513                 return ip6_route_del(&cfg, extack);
4514         }
4515 }
4516
4517 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4518                               struct netlink_ext_ack *extack)
4519 {
4520         struct fib6_config cfg;
4521         int err;
4522
4523         err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
4524         if (err < 0)
4525                 return err;
4526
4527         if (cfg.fc_mp)
4528                 return ip6_route_multipath_add(&cfg, extack);
4529         else
4530                 return ip6_route_add(&cfg, GFP_KERNEL, extack);
4531 }
4532
4533 static size_t rt6_nlmsg_size(struct fib6_info *rt)
4534 {
4535         int nexthop_len = 0;
4536
4537         if (rt->fib6_nsiblings) {
4538                 nexthop_len = nla_total_size(0)  /* RTA_MULTIPATH */
4539                             + NLA_ALIGN(sizeof(struct rtnexthop))
4540                             + nla_total_size(16) /* RTA_GATEWAY */
4541                             + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
4542
4543                 nexthop_len *= rt->fib6_nsiblings;
4544         }
4545
4546         return NLMSG_ALIGN(sizeof(struct rtmsg))
4547                + nla_total_size(16) /* RTA_SRC */
4548                + nla_total_size(16) /* RTA_DST */
4549                + nla_total_size(16) /* RTA_GATEWAY */
4550                + nla_total_size(16) /* RTA_PREFSRC */
4551                + nla_total_size(4) /* RTA_TABLE */
4552                + nla_total_size(4) /* RTA_IIF */
4553                + nla_total_size(4) /* RTA_OIF */
4554                + nla_total_size(4) /* RTA_PRIORITY */
4555                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
4556                + nla_total_size(sizeof(struct rta_cacheinfo))
4557                + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
4558                + nla_total_size(1) /* RTA_PREF */
4559                + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
4560                + nexthop_len;
4561 }
4562
4563 static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
4564                             unsigned int *flags, bool skip_oif)
4565 {
4566         if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
4567                 *flags |= RTNH_F_DEAD;
4568
4569         if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
4570                 *flags |= RTNH_F_LINKDOWN;
4571
4572                 rcu_read_lock();
4573                 if (fib6_ignore_linkdown(rt))
4574                         *flags |= RTNH_F_DEAD;
4575                 rcu_read_unlock();
4576         }
4577
4578         if (rt->fib6_flags & RTF_GATEWAY) {
4579                 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
4580                         goto nla_put_failure;
4581         }
4582
4583         *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4584         if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
4585                 *flags |= RTNH_F_OFFLOAD;
4586
4587         /* not needed for multipath encoding b/c it has a rtnexthop struct */
4588         if (!skip_oif && rt->fib6_nh.nh_dev &&
4589             nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
4590                 goto nla_put_failure;
4591
4592         if (rt->fib6_nh.nh_lwtstate &&
4593             lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
4594                 goto nla_put_failure;
4595
4596         return 0;
4597
4598 nla_put_failure:
4599         return -EMSGSIZE;
4600 }
4601
4602 /* add multipath next hop */
4603 static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
4604 {
4605         const struct net_device *dev = rt->fib6_nh.nh_dev;
4606         struct rtnexthop *rtnh;
4607         unsigned int flags = 0;
4608
4609         rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4610         if (!rtnh)
4611                 goto nla_put_failure;
4612
4613         rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4614         rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
4615
4616         if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
4617                 goto nla_put_failure;
4618
4619         rtnh->rtnh_flags = flags;
4620
4621         /* length of rtnetlink header + attributes */
4622         rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4623
4624         return 0;
4625
4626 nla_put_failure:
4627         return -EMSGSIZE;
4628 }
4629
4630 static int rt6_fill_node(struct net *net, struct sk_buff *skb,
4631                          struct fib6_info *rt, struct dst_entry *dst,
4632                          struct in6_addr *dest, struct in6_addr *src,
4633                          int iif, int type, u32 portid, u32 seq,
4634                          unsigned int flags)
4635 {
4636         struct rt6_info *rt6 = (struct rt6_info *)dst;
4637         struct rt6key *rt6_dst, *rt6_src;
4638         u32 *pmetrics, table, rt6_flags;
4639         struct nlmsghdr *nlh;
4640         struct rtmsg *rtm;
4641         long expires = 0;
4642
4643         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
4644         if (!nlh)
4645                 return -EMSGSIZE;
4646
4647         if (rt6) {
4648                 rt6_dst = &rt6->rt6i_dst;
4649                 rt6_src = &rt6->rt6i_src;
4650                 rt6_flags = rt6->rt6i_flags;
4651         } else {
4652                 rt6_dst = &rt->fib6_dst;
4653                 rt6_src = &rt->fib6_src;
4654                 rt6_flags = rt->fib6_flags;
4655         }
4656
4657         rtm = nlmsg_data(nlh);
4658         rtm->rtm_family = AF_INET6;
4659         rtm->rtm_dst_len = rt6_dst->plen;
4660         rtm->rtm_src_len = rt6_src->plen;
4661         rtm->rtm_tos = 0;
4662         if (rt->fib6_table)
4663                 table = rt->fib6_table->tb6_id;
4664         else
4665                 table = RT6_TABLE_UNSPEC;
4666         rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
4667         if (nla_put_u32(skb, RTA_TABLE, table))
4668                 goto nla_put_failure;
4669
4670         rtm->rtm_type = rt->fib6_type;
4671         rtm->rtm_flags = 0;
4672         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4673         rtm->rtm_protocol = rt->fib6_protocol;
4674
4675         if (rt6_flags & RTF_CACHE)
4676                 rtm->rtm_flags |= RTM_F_CLONED;
4677
4678         if (dest) {
4679                 if (nla_put_in6_addr(skb, RTA_DST, dest))
4680                         goto nla_put_failure;
4681                 rtm->rtm_dst_len = 128;
4682         } else if (rtm->rtm_dst_len)
4683                 if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
4684                         goto nla_put_failure;
4685 #ifdef CONFIG_IPV6_SUBTREES
4686         if (src) {
4687                 if (nla_put_in6_addr(skb, RTA_SRC, src))
4688                         goto nla_put_failure;
4689                 rtm->rtm_src_len = 128;
4690         } else if (rtm->rtm_src_len &&
4691                    nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
4692                 goto nla_put_failure;
4693 #endif
4694         if (iif) {
4695 #ifdef CONFIG_IPV6_MROUTE
4696                 if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
4697                         int err = ip6mr_get_route(net, skb, rtm, portid);
4698
4699                         if (err == 0)
4700                                 return 0;
4701                         if (err < 0)
4702                                 goto nla_put_failure;
4703                 } else
4704 #endif
4705                         if (nla_put_u32(skb, RTA_IIF, iif))
4706                                 goto nla_put_failure;
4707         } else if (dest) {
4708                 struct in6_addr saddr_buf;
4709                 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
4710                     nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4711                         goto nla_put_failure;
4712         }
4713
4714         if (rt->fib6_prefsrc.plen) {
4715                 struct in6_addr saddr_buf;
4716                 saddr_buf = rt->fib6_prefsrc.addr;
4717                 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4718                         goto nla_put_failure;
4719         }
4720
4721         pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4722         if (rtnetlink_put_metrics(skb, pmetrics) < 0)
4723                 goto nla_put_failure;
4724
4725         if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
4726                 goto nla_put_failure;
4727
4728         /* For multipath routes, walk the siblings list and add
4729          * each as a nexthop within RTA_MULTIPATH.
4730          */
4731         if (rt6) {
4732                 if (rt6_flags & RTF_GATEWAY &&
4733                     nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
4734                         goto nla_put_failure;
4735
4736                 if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
4737                         goto nla_put_failure;
4738         } else if (rt->fib6_nsiblings) {
4739                 struct fib6_info *sibling, *next_sibling;
4740                 struct nlattr *mp;
4741
4742                 mp = nla_nest_start(skb, RTA_MULTIPATH);
4743                 if (!mp)
4744                         goto nla_put_failure;
4745
4746                 if (rt6_add_nexthop(skb, rt) < 0)
4747                         goto nla_put_failure;
4748
4749                 list_for_each_entry_safe(sibling, next_sibling,
4750                                          &rt->fib6_siblings, fib6_siblings) {
4751                         if (rt6_add_nexthop(skb, sibling) < 0)
4752                                 goto nla_put_failure;
4753                 }
4754
4755                 nla_nest_end(skb, mp);
4756         } else {
4757                 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
4758                         goto nla_put_failure;
4759         }
4760
4761         if (rt6_flags & RTF_EXPIRES) {
4762                 expires = dst ? dst->expires : rt->expires;
4763                 expires -= jiffies;
4764         }
4765
4766         if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
4767                 goto nla_put_failure;
4768
4769         if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
4770                 goto nla_put_failure;
4771
4772
4773         nlmsg_end(skb, nlh);
4774         return 0;
4775
4776 nla_put_failure:
4777         nlmsg_cancel(skb, nlh);
4778         return -EMSGSIZE;
4779 }
4780
4781 static bool fib6_info_uses_dev(const struct fib6_info *f6i,
4782                                const struct net_device *dev)
4783 {
4784         if (f6i->fib6_nh.nh_dev == dev)
4785                 return true;
4786
4787         if (f6i->fib6_nsiblings) {
4788                 struct fib6_info *sibling, *next_sibling;
4789
4790                 list_for_each_entry_safe(sibling, next_sibling,
4791                                          &f6i->fib6_siblings, fib6_siblings) {
4792                         if (sibling->fib6_nh.nh_dev == dev)
4793                                 return true;
4794                 }
4795         }
4796
4797         return false;
4798 }
4799
4800 int rt6_dump_route(struct fib6_info *rt, void *p_arg)
4801 {
4802         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
4803         struct fib_dump_filter *filter = &arg->filter;
4804         unsigned int flags = NLM_F_MULTI;
4805         struct net *net = arg->net;
4806
4807         if (rt == net->ipv6.fib6_null_entry)
4808                 return 0;
4809
4810         if ((filter->flags & RTM_F_PREFIX) &&
4811             !(rt->fib6_flags & RTF_PREFIX_RT)) {
4812                 /* success since this is not a prefix route */
4813                 return 1;
4814         }
4815         if (filter->filter_set) {
4816                 if ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
4817                     (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
4818                     (filter->protocol && rt->fib6_protocol != filter->protocol)) {
4819                         return 1;
4820                 }
4821                 flags |= NLM_F_DUMP_FILTERED;
4822         }
4823
4824         return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4825                              RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4826                              arg->cb->nlh->nlmsg_seq, flags);
4827 }
4828
4829 static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
4830                                         const struct nlmsghdr *nlh,
4831                                         struct nlattr **tb,
4832                                         struct netlink_ext_ack *extack)
4833 {
4834         struct rtmsg *rtm;
4835         int i, err;
4836
4837         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
4838                 NL_SET_ERR_MSG_MOD(extack,
4839                                    "Invalid header for get route request");
4840                 return -EINVAL;
4841         }
4842
4843         if (!netlink_strict_get_check(skb))
4844                 return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
4845                                    rtm_ipv6_policy, extack);
4846
4847         rtm = nlmsg_data(nlh);
4848         if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
4849             (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
4850             rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
4851             rtm->rtm_type) {
4852                 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
4853                 return -EINVAL;
4854         }
4855         if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
4856                 NL_SET_ERR_MSG_MOD(extack,
4857                                    "Invalid flags for get route request");
4858                 return -EINVAL;
4859         }
4860
4861         err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
4862                                  rtm_ipv6_policy, extack);
4863         if (err)
4864                 return err;
4865
4866         if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
4867             (tb[RTA_DST] && !rtm->rtm_dst_len)) {
4868                 NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
4869                 return -EINVAL;
4870         }
4871
4872         for (i = 0; i <= RTA_MAX; i++) {
4873                 if (!tb[i])
4874                         continue;
4875
4876                 switch (i) {
4877                 case RTA_SRC:
4878                 case RTA_DST:
4879                 case RTA_IIF:
4880                 case RTA_OIF:
4881                 case RTA_MARK:
4882                 case RTA_UID:
4883                 case RTA_SPORT:
4884                 case RTA_DPORT:
4885                 case RTA_IP_PROTO:
4886                         break;
4887                 default:
4888                         NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
4889                         return -EINVAL;
4890                 }
4891         }
4892
4893         return 0;
4894 }
4895
4896 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4897                               struct netlink_ext_ack *extack)
4898 {
4899         struct net *net = sock_net(in_skb->sk);
4900         struct nlattr *tb[RTA_MAX+1];
4901         int err, iif = 0, oif = 0;
4902         struct fib6_info *from;
4903         struct dst_entry *dst;
4904         struct rt6_info *rt;
4905         struct sk_buff *skb;
4906         struct rtmsg *rtm;
4907         struct flowi6 fl6 = {};
4908         bool fibmatch;
4909
4910         err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
4911         if (err < 0)
4912                 goto errout;
4913
4914         err = -EINVAL;
4915         rtm = nlmsg_data(nlh);
4916         fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
4917         fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
4918
4919         if (tb[RTA_SRC]) {
4920                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4921                         goto errout;
4922
4923                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
4924         }
4925
4926         if (tb[RTA_DST]) {
4927                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4928                         goto errout;
4929
4930                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
4931         }
4932
4933         if (tb[RTA_IIF])
4934                 iif = nla_get_u32(tb[RTA_IIF]);
4935
4936         if (tb[RTA_OIF])
4937                 oif = nla_get_u32(tb[RTA_OIF]);
4938
4939         if (tb[RTA_MARK])
4940                 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4941
4942         if (tb[RTA_UID])
4943                 fl6.flowi6_uid = make_kuid(current_user_ns(),
4944                                            nla_get_u32(tb[RTA_UID]));
4945         else
4946                 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4947
4948         if (tb[RTA_SPORT])
4949                 fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
4950
4951         if (tb[RTA_DPORT])
4952                 fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
4953
4954         if (tb[RTA_IP_PROTO]) {
4955                 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
4956                                                   &fl6.flowi6_proto, AF_INET6,
4957                                                   extack);
4958                 if (err)
4959                         goto errout;
4960         }
4961
4962         if (iif) {
4963                 struct net_device *dev;
4964                 int flags = 0;
4965
4966                 rcu_read_lock();
4967
4968                 dev = dev_get_by_index_rcu(net, iif);
4969                 if (!dev) {
4970                         rcu_read_unlock();
4971                         err = -ENODEV;
4972                         goto errout;
4973                 }
4974
4975                 fl6.flowi6_iif = iif;
4976
4977                 if (!ipv6_addr_any(&fl6.saddr))
4978                         flags |= RT6_LOOKUP_F_HAS_SADDR;
4979
4980                 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
4981
4982                 rcu_read_unlock();
4983         } else {
4984                 fl6.flowi6_oif = oif;
4985
4986                 dst = ip6_route_output(net, NULL, &fl6);
4987         }
4988
4989
4990         rt = container_of(dst, struct rt6_info, dst);
4991         if (rt->dst.error) {
4992                 err = rt->dst.error;
4993                 ip6_rt_put(rt);
4994                 goto errout;
4995         }
4996
4997         if (rt == net->ipv6.ip6_null_entry) {
4998                 err = rt->dst.error;
4999                 ip6_rt_put(rt);
5000                 goto errout;
5001         }
5002
5003         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
5004         if (!skb) {
5005                 ip6_rt_put(rt);
5006                 err = -ENOBUFS;
5007                 goto errout;
5008         }
5009
5010         skb_dst_set(skb, &rt->dst);
5011
5012         rcu_read_lock();
5013         from = rcu_dereference(rt->from);
5014
5015         if (fibmatch)
5016                 err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
5017                                     RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
5018                                     nlh->nlmsg_seq, 0);
5019         else
5020                 err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
5021                                     &fl6.saddr, iif, RTM_NEWROUTE,
5022                                     NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
5023                                     0);
5024         rcu_read_unlock();
5025
5026         if (err < 0) {
5027                 kfree_skb(skb);
5028                 goto errout;
5029         }
5030
5031         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
5032 errout:
5033         return err;
5034 }
5035
5036 void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
5037                      unsigned int nlm_flags)
5038 {
5039         struct sk_buff *skb;
5040         struct net *net = info->nl_net;
5041         u32 seq;
5042         int err;
5043
5044         err = -ENOBUFS;
5045         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
5046
5047         skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
5048         if (!skb)
5049                 goto errout;
5050
5051         err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
5052                             event, info->portid, seq, nlm_flags);
5053         if (err < 0) {
5054                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
5055                 WARN_ON(err == -EMSGSIZE);
5056                 kfree_skb(skb);
5057                 goto errout;
5058         }
5059         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
5060                     info->nlh, gfp_any());
5061         return;
5062 errout:
5063         if (err < 0)
5064                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
5065 }
5066
5067 static int ip6_route_dev_notify(struct notifier_block *this,
5068                                 unsigned long event, void *ptr)
5069 {
5070         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
5071         struct net *net = dev_net(dev);
5072
5073         if (!(dev->flags & IFF_LOOPBACK))
5074                 return NOTIFY_OK;
5075
5076         if (event == NETDEV_REGISTER) {
5077                 net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
5078                 net->ipv6.ip6_null_entry->dst.dev = dev;
5079                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
5080 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5081                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
5082                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
5083                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
5084                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
5085 #endif
5086          } else if (event == NETDEV_UNREGISTER &&
5087                     dev->reg_state != NETREG_UNREGISTERED) {
5088                 /* NETDEV_UNREGISTER could be fired for multiple times by
5089                  * netdev_wait_allrefs(). Make sure we only call this once.
5090                  */
5091                 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
5092 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5093                 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
5094                 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
5095 #endif
5096         }
5097
5098         return NOTIFY_OK;
5099 }
5100
5101 /*
5102  *      /proc
5103  */
5104
5105 #ifdef CONFIG_PROC_FS
5106 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
5107 {
5108         struct net *net = (struct net *)seq->private;
5109         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
5110                    net->ipv6.rt6_stats->fib_nodes,
5111                    net->ipv6.rt6_stats->fib_route_nodes,
5112                    atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
5113                    net->ipv6.rt6_stats->fib_rt_entries,
5114                    net->ipv6.rt6_stats->fib_rt_cache,
5115                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
5116                    net->ipv6.rt6_stats->fib_discarded_routes);
5117
5118         return 0;
5119 }
5120 #endif  /* CONFIG_PROC_FS */
5121
5122 #ifdef CONFIG_SYSCTL
5123
5124 static
5125 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
5126                               void __user *buffer, size_t *lenp, loff_t *ppos)
5127 {
5128         struct net *net;
5129         int delay;
5130         int ret;
5131         if (!write)
5132                 return -EINVAL;
5133
5134         net = (struct net *)ctl->extra1;
5135         delay = net->ipv6.sysctl.flush_delay;
5136         ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
5137         if (ret)
5138                 return ret;
5139
5140         fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
5141         return 0;
5142 }
5143
5144 static int zero;
5145 static int one = 1;
5146
5147 static struct ctl_table ipv6_route_table_template[] = {
5148         {
5149                 .procname       =       "flush",
5150                 .data           =       &init_net.ipv6.sysctl.flush_delay,
5151                 .maxlen         =       sizeof(int),
5152                 .mode           =       0200,
5153                 .proc_handler   =       ipv6_sysctl_rtcache_flush
5154         },
5155         {
5156                 .procname       =       "gc_thresh",
5157                 .data           =       &ip6_dst_ops_template.gc_thresh,
5158                 .maxlen         =       sizeof(int),
5159                 .mode           =       0644,
5160                 .proc_handler   =       proc_dointvec,
5161         },
5162         {
5163                 .procname       =       "max_size",
5164                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
5165                 .maxlen         =       sizeof(int),
5166                 .mode           =       0644,
5167                 .proc_handler   =       proc_dointvec,
5168         },
5169         {
5170                 .procname       =       "gc_min_interval",
5171                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
5172                 .maxlen         =       sizeof(int),
5173                 .mode           =       0644,
5174                 .proc_handler   =       proc_dointvec_jiffies,
5175         },
5176         {
5177                 .procname       =       "gc_timeout",
5178                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
5179                 .maxlen         =       sizeof(int),
5180                 .mode           =       0644,
5181                 .proc_handler   =       proc_dointvec_jiffies,
5182         },
5183         {
5184                 .procname       =       "gc_interval",
5185                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
5186                 .maxlen         =       sizeof(int),
5187                 .mode           =       0644,
5188                 .proc_handler   =       proc_dointvec_jiffies,
5189         },
5190         {
5191                 .procname       =       "gc_elasticity",
5192                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
5193                 .maxlen         =       sizeof(int),
5194                 .mode           =       0644,
5195                 .proc_handler   =       proc_dointvec,
5196         },
5197         {
5198                 .procname       =       "mtu_expires",
5199                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
5200                 .maxlen         =       sizeof(int),
5201                 .mode           =       0644,
5202                 .proc_handler   =       proc_dointvec_jiffies,
5203         },
5204         {
5205                 .procname       =       "min_adv_mss",
5206                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
5207                 .maxlen         =       sizeof(int),
5208                 .mode           =       0644,
5209                 .proc_handler   =       proc_dointvec,
5210         },
5211         {
5212                 .procname       =       "gc_min_interval_ms",
5213                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
5214                 .maxlen         =       sizeof(int),
5215                 .mode           =       0644,
5216                 .proc_handler   =       proc_dointvec_ms_jiffies,
5217         },
5218         {
5219                 .procname       =       "skip_notify_on_dev_down",
5220                 .data           =       &init_net.ipv6.sysctl.skip_notify_on_dev_down,
5221                 .maxlen         =       sizeof(int),
5222                 .mode           =       0644,
5223                 .proc_handler   =       proc_dointvec,
5224                 .extra1         =       &zero,
5225                 .extra2         =       &one,
5226         },
5227         { }
5228 };
5229
5230 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
5231 {
5232         struct ctl_table *table;
5233
5234         table = kmemdup(ipv6_route_table_template,
5235                         sizeof(ipv6_route_table_template),
5236                         GFP_KERNEL);
5237
5238         if (table) {
5239                 table[0].data = &net->ipv6.sysctl.flush_delay;
5240                 table[0].extra1 = net;
5241                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5242                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5243                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5244                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5245                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5246                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5247                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5248                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
5249                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5250                 table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
5251
5252                 /* Don't export sysctls to unprivileged users */
5253                 if (net->user_ns != &init_user_ns)
5254                         table[0].procname = NULL;
5255         }
5256
5257         return table;
5258 }
5259 #endif
5260
5261 static int __net_init ip6_route_net_init(struct net *net)
5262 {
5263         int ret = -ENOMEM;
5264
5265         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5266                sizeof(net->ipv6.ip6_dst_ops));
5267
5268         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5269                 goto out_ip6_dst_ops;
5270
5271         net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5272                                             sizeof(*net->ipv6.fib6_null_entry),
5273                                             GFP_KERNEL);
5274         if (!net->ipv6.fib6_null_entry)
5275                 goto out_ip6_dst_entries;
5276
5277         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5278                                            sizeof(*net->ipv6.ip6_null_entry),
5279                                            GFP_KERNEL);
5280         if (!net->ipv6.ip6_null_entry)
5281                 goto out_fib6_null_entry;
5282         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5283         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5284                          ip6_template_metrics, true);
5285
5286 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5287         net->ipv6.fib6_has_custom_rules = false;
5288         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5289                                                sizeof(*net->ipv6.ip6_prohibit_entry),
5290                                                GFP_KERNEL);
5291         if (!net->ipv6.ip6_prohibit_entry)
5292                 goto out_ip6_null_entry;
5293         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5294         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5295                          ip6_template_metrics, true);
5296
5297         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5298                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
5299                                                GFP_KERNEL);
5300         if (!net->ipv6.ip6_blk_hole_entry)
5301                 goto out_ip6_prohibit_entry;
5302         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5303         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5304                          ip6_template_metrics, true);
5305 #endif
5306
5307         net->ipv6.sysctl.flush_delay = 0;
5308         net->ipv6.sysctl.ip6_rt_max_size = 4096;
5309         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5310         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5311         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5312         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5313         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5314         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5315         net->ipv6.sysctl.skip_notify_on_dev_down = 0;
5316
5317         net->ipv6.ip6_rt_gc_expire = 30*HZ;
5318
5319         ret = 0;
5320 out:
5321         return ret;
5322
5323 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5324 out_ip6_prohibit_entry:
5325         kfree(net->ipv6.ip6_prohibit_entry);
5326 out_ip6_null_entry:
5327         kfree(net->ipv6.ip6_null_entry);
5328 #endif
5329 out_fib6_null_entry:
5330         kfree(net->ipv6.fib6_null_entry);
5331 out_ip6_dst_entries:
5332         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5333 out_ip6_dst_ops:
5334         goto out;
5335 }
5336
5337 static void __net_exit ip6_route_net_exit(struct net *net)
5338 {
5339         kfree(net->ipv6.fib6_null_entry);
5340         kfree(net->ipv6.ip6_null_entry);
5341 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5342         kfree(net->ipv6.ip6_prohibit_entry);
5343         kfree(net->ipv6.ip6_blk_hole_entry);
5344 #endif
5345         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5346 }
5347
5348 static int __net_init ip6_route_net_init_late(struct net *net)
5349 {
5350 #ifdef CONFIG_PROC_FS
5351         proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
5352                         sizeof(struct ipv6_route_iter));
5353         proc_create_net_single("rt6_stats", 0444, net->proc_net,
5354                         rt6_stats_seq_show, NULL);
5355 #endif
5356         return 0;
5357 }
5358
5359 static void __net_exit ip6_route_net_exit_late(struct net *net)
5360 {
5361 #ifdef CONFIG_PROC_FS
5362         remove_proc_entry("ipv6_route", net->proc_net);
5363         remove_proc_entry("rt6_stats", net->proc_net);
5364 #endif
5365 }
5366
5367 static struct pernet_operations ip6_route_net_ops = {
5368         .init = ip6_route_net_init,
5369         .exit = ip6_route_net_exit,
5370 };
5371
5372 static int __net_init ipv6_inetpeer_init(struct net *net)
5373 {
5374         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5375
5376         if (!bp)
5377                 return -ENOMEM;
5378         inet_peer_base_init(bp);
5379         net->ipv6.peers = bp;
5380         return 0;
5381 }
5382
5383 static void __net_exit ipv6_inetpeer_exit(struct net *net)
5384 {
5385         struct inet_peer_base *bp = net->ipv6.peers;
5386
5387         net->ipv6.peers = NULL;
5388         inetpeer_invalidate_tree(bp);
5389         kfree(bp);
5390 }
5391
5392 static struct pernet_operations ipv6_inetpeer_ops = {
5393         .init   =       ipv6_inetpeer_init,
5394         .exit   =       ipv6_inetpeer_exit,
5395 };
5396
5397 static struct pernet_operations ip6_route_net_late_ops = {
5398         .init = ip6_route_net_init_late,
5399         .exit = ip6_route_net_exit_late,
5400 };
5401
5402 static struct notifier_block ip6_route_dev_notifier = {
5403         .notifier_call = ip6_route_dev_notify,
5404         .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
5405 };
5406
5407 void __init ip6_route_init_special_entries(void)
5408 {
5409         /* Registering of the loopback is done before this portion of code,
5410          * the loopback reference in rt6_info will not be taken, do it
5411          * manually for init_net */
5412         init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
5413         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5414         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5415   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5416         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5417         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5418         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5419         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5420   #endif
5421 }
5422
5423 int __init ip6_route_init(void)
5424 {
5425         int ret;
5426         int cpu;
5427
5428         ret = -ENOMEM;
5429         ip6_dst_ops_template.kmem_cachep =
5430                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
5431                                   SLAB_HWCACHE_ALIGN, NULL);
5432         if (!ip6_dst_ops_template.kmem_cachep)
5433                 goto out;
5434
5435         ret = dst_entries_init(&ip6_dst_blackhole_ops);
5436         if (ret)
5437                 goto out_kmem_cache;
5438
5439         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5440         if (ret)
5441                 goto out_dst_entries;
5442
5443         ret = register_pernet_subsys(&ip6_route_net_ops);
5444         if (ret)
5445                 goto out_register_inetpeer;
5446
5447         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5448
5449         ret = fib6_init();
5450         if (ret)
5451                 goto out_register_subsys;
5452
5453         ret = xfrm6_init();
5454         if (ret)
5455                 goto out_fib6_init;
5456
5457         ret = fib6_rules_init();
5458         if (ret)
5459                 goto xfrm6_init;
5460
5461         ret = register_pernet_subsys(&ip6_route_net_late_ops);
5462         if (ret)
5463                 goto fib6_rules_init;
5464
5465         ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5466                                    inet6_rtm_newroute, NULL, 0);
5467         if (ret < 0)
5468                 goto out_register_late_subsys;
5469
5470         ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5471                                    inet6_rtm_delroute, NULL, 0);
5472         if (ret < 0)
5473                 goto out_register_late_subsys;
5474
5475         ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5476                                    inet6_rtm_getroute, NULL,
5477                                    RTNL_FLAG_DOIT_UNLOCKED);
5478         if (ret < 0)
5479                 goto out_register_late_subsys;
5480
5481         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
5482         if (ret)
5483                 goto out_register_late_subsys;
5484
5485         for_each_possible_cpu(cpu) {
5486                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5487
5488                 INIT_LIST_HEAD(&ul->head);
5489                 spin_lock_init(&ul->lock);
5490         }
5491
5492 out:
5493         return ret;
5494
5495 out_register_late_subsys:
5496         rtnl_unregister_all(PF_INET6);
5497         unregister_pernet_subsys(&ip6_route_net_late_ops);
5498 fib6_rules_init:
5499         fib6_rules_cleanup();
5500 xfrm6_init:
5501         xfrm6_fini();
5502 out_fib6_init:
5503         fib6_gc_cleanup();
5504 out_register_subsys:
5505         unregister_pernet_subsys(&ip6_route_net_ops);
5506 out_register_inetpeer:
5507         unregister_pernet_subsys(&ipv6_inetpeer_ops);
5508 out_dst_entries:
5509         dst_entries_destroy(&ip6_dst_blackhole_ops);
5510 out_kmem_cache:
5511         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5512         goto out;
5513 }
5514
5515 void ip6_route_cleanup(void)
5516 {
5517         unregister_netdevice_notifier(&ip6_route_dev_notifier);
5518         unregister_pernet_subsys(&ip6_route_net_late_ops);
5519         fib6_rules_cleanup();
5520         xfrm6_fini();
5521         fib6_gc_cleanup();
5522         unregister_pernet_subsys(&ipv6_inetpeer_ops);
5523         unregister_pernet_subsys(&ip6_route_net_ops);
5524         dst_entries_destroy(&ip6_dst_blackhole_ops);
5525         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5526 }