OSDN Git Service

ipv4: Add helpers for neigh lookup for nexthop
authorDavid Ahern <dsahern@gmail.com>
Fri, 5 Apr 2019 23:30:34 +0000 (16:30 -0700)
committerDavid S. Miller <davem@davemloft.net>
Mon, 8 Apr 2019 22:22:41 +0000 (15:22 -0700)
A common theme in the output path is looking up a neigh entry for a
nexthop, either the gateway in an rtable or a fallback to the daddr
in the skb:

        nexthop = (__force u32)rt_nexthop(rt, ip_hdr(skb)->daddr);
        neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
        if (unlikely(!neigh))
                neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);

To allow the nexthop to be an IPv6 address we need to consider the
family of the nexthop and then call __ipv{4,6}_neigh_lookup_noref based
on it.

To make this simpler, add a ip_neigh_gw4 helper similar to ip_neigh_gw6
added in an earlier patch which handles:

        neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
        if (unlikely(!neigh))
                neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);

And then add a second one, ip_neigh_for_gw, that calls either
ip_neigh_gw4 or ip_neigh_gw6 based on the address family of the gateway.

Update the output paths in the VRF driver and core v4 code to use
ip_neigh_for_gw simplifying the family based lookup and making both
ready for a v6 nexthop.

ipv4_neigh_lookup has a different need - the potential to resolve a
passed in address in addition to any gateway in the rtable or skb. Since
this is a one-off, add ip_neigh_gw4 and ip_neigh_gw6 diectly. The
difference between __neigh_create used by the helpers and neigh_create
called by ipv4_neigh_lookup is taking a refcount, so add rcu_read_lock_bh
and bump the refcnt on the neigh entry.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/vrf.c
include/net/route.h
net/ipv4/ip_output.c
net/ipv4/route.c

index fd13377..18d752a 100644 (file)
@@ -549,7 +549,7 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
        struct net_device *dev = dst->dev;
        unsigned int hh_len = LL_RESERVED_SPACE(dev);
        struct neighbour *neigh;
-       u32 nexthop;
+       bool is_v6gw = false;
        int ret = -EINVAL;
 
        nf_reset(skb);
@@ -572,13 +572,11 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
 
        rcu_read_lock_bh();
 
-       nexthop = (__force u32)rt_nexthop(rt, ip_hdr(skb)->daddr);
-       neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
-       if (unlikely(!neigh))
-               neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
+       neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
        if (!IS_ERR(neigh)) {
                sock_confirm_neigh(skb, neigh);
-               ret = neigh_output(neigh, skb, false);
+               /* if crossing protocols, can not use the cached header */
+               ret = neigh_output(neigh, skb, is_v6gw);
                rcu_read_unlock_bh();
                return ret;
        }
index 5d28a25..96f6c9a 100644 (file)
@@ -29,6 +29,8 @@
 #include <net/flow.h>
 #include <net/inet_sock.h>
 #include <net/ip_fib.h>
+#include <net/arp.h>
+#include <net/ndisc.h>
 #include <linux/in_route.h>
 #include <linux/rtnetlink.h>
 #include <linux/rcupdate.h>
@@ -350,4 +352,34 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
        return hoplimit;
 }
 
+static inline struct neighbour *ip_neigh_gw4(struct net_device *dev,
+                                            __be32 daddr)
+{
+       struct neighbour *neigh;
+
+       neigh = __ipv4_neigh_lookup_noref(dev, daddr);
+       if (unlikely(!neigh))
+               neigh = __neigh_create(&arp_tbl, &daddr, dev, false);
+
+       return neigh;
+}
+
+static inline struct neighbour *ip_neigh_for_gw(struct rtable *rt,
+                                               struct sk_buff *skb,
+                                               bool *is_v6gw)
+{
+       struct net_device *dev = rt->dst.dev;
+       struct neighbour *neigh;
+
+       if (likely(rt->rt_gw_family == AF_INET)) {
+               neigh = ip_neigh_gw4(dev, rt->rt_gw4);
+       } else if (rt->rt_gw_family == AF_INET6) {
+               neigh = ip_neigh_gw6(dev, &rt->rt_gw6);
+               *is_v6gw = true;
+       } else {
+               neigh = ip_neigh_gw4(dev, ip_hdr(skb)->daddr);
+       }
+       return neigh;
+}
+
 #endif /* _ROUTE_H */
index cca4892..4e42c19 100644 (file)
@@ -188,7 +188,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
        struct net_device *dev = dst->dev;
        unsigned int hh_len = LL_RESERVED_SPACE(dev);
        struct neighbour *neigh;
-       u32 nexthop;
+       bool is_v6gw = false;
 
        if (rt->rt_type == RTN_MULTICAST) {
                IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len);
@@ -218,16 +218,13 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
        }
 
        rcu_read_lock_bh();
-       nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);
-       neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
-       if (unlikely(!neigh))
-               neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
+       neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
        if (!IS_ERR(neigh)) {
                int res;
 
                sock_confirm_neigh(skb, neigh);
-               res = neigh_output(neigh, skb, false);
-
+               /* if crossing protocols, can not use the cached header */
+               res = neigh_output(neigh, skb, is_v6gw);
                rcu_read_unlock_bh();
                return res;
        }
index 6e58acf..32ecb4c 100644 (file)
@@ -436,18 +436,27 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
 {
        const struct rtable *rt = container_of(dst, struct rtable, dst);
        struct net_device *dev = dst->dev;
-       const __be32 *pkey = daddr;
        struct neighbour *n;
 
-       if (rt->rt_gw_family == AF_INET)
-               pkey = (const __be32 *) &rt->rt_gw4;
-       else if (skb)
-               pkey = &ip_hdr(skb)->daddr;
-
-       n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
-       if (n)
-               return n;
-       return neigh_create(&arp_tbl, pkey, dev);
+       rcu_read_lock_bh();
+
+       if (likely(rt->rt_gw_family == AF_INET)) {
+               n = ip_neigh_gw4(dev, rt->rt_gw4);
+       } else if (rt->rt_gw_family == AF_INET6) {
+               n = ip_neigh_gw6(dev, &rt->rt_gw6);
+        } else {
+               __be32 pkey;
+
+               pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr);
+               n = ip_neigh_gw4(dev, pkey);
+       }
+
+       if (n && !refcount_inc_not_zero(&n->refcnt))
+               n = NULL;
+
+       rcu_read_unlock_bh();
+
+       return n;
 }
 
 static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)