OSDN Git Service

4dc4574897705d526c3115cd94272d046e3e51e6
[tomoyo/tomoyo-test1.git] / drivers / net / geneve.c
1 /*
2  * GENEVE: Generic Network Virtualization Encapsulation
3  *
4  * Copyright (c) 2015 Red Hat, Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13 #include <linux/kernel.h>
14 #include <linux/module.h>
15 #include <linux/etherdevice.h>
16 #include <linux/hash.h>
17 #include <net/dst_metadata.h>
18 #include <net/gro_cells.h>
19 #include <net/rtnetlink.h>
20 #include <net/geneve.h>
21 #include <net/protocol.h>
22
23 #define GENEVE_NETDEV_VER       "0.6"
24
25 #define GENEVE_UDP_PORT         6081
26
27 #define GENEVE_N_VID            (1u << 24)
28 #define GENEVE_VID_MASK         (GENEVE_N_VID - 1)
29
30 #define VNI_HASH_BITS           10
31 #define VNI_HASH_SIZE           (1<<VNI_HASH_BITS)
32
33 static bool log_ecn_error = true;
34 module_param(log_ecn_error, bool, 0644);
35 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
36
37 #define GENEVE_VER 0
38 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
39 #define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN)
40 #define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN)
41
42 /* per-network namespace private data for this module */
43 struct geneve_net {
44         struct list_head        geneve_list;
45         struct list_head        sock_list;
46 };
47
48 static unsigned int geneve_net_id;
49
50 struct geneve_dev_node {
51         struct hlist_node hlist;
52         struct geneve_dev *geneve;
53 };
54
55 /* Pseudo network device */
56 struct geneve_dev {
57         struct geneve_dev_node hlist4;  /* vni hash table for IPv4 socket */
58 #if IS_ENABLED(CONFIG_IPV6)
59         struct geneve_dev_node hlist6;  /* vni hash table for IPv6 socket */
60 #endif
61         struct net         *net;        /* netns for packet i/o */
62         struct net_device  *dev;        /* netdev for geneve tunnel */
63         struct ip_tunnel_info info;
64         struct geneve_sock __rcu *sock4;        /* IPv4 socket used for geneve tunnel */
65 #if IS_ENABLED(CONFIG_IPV6)
66         struct geneve_sock __rcu *sock6;        /* IPv6 socket used for geneve tunnel */
67 #endif
68         struct list_head   next;        /* geneve's per namespace list */
69         struct gro_cells   gro_cells;
70         bool               collect_md;
71         bool               use_udp6_rx_checksums;
72         bool               ttl_inherit;
73 };
74
75 struct geneve_sock {
76         bool                    collect_md;
77         struct list_head        list;
78         struct socket           *sock;
79         struct rcu_head         rcu;
80         int                     refcnt;
81         struct hlist_head       vni_list[VNI_HASH_SIZE];
82 };
83
84 static inline __u32 geneve_net_vni_hash(u8 vni[3])
85 {
86         __u32 vnid;
87
88         vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2];
89         return hash_32(vnid, VNI_HASH_BITS);
90 }
91
92 static __be64 vni_to_tunnel_id(const __u8 *vni)
93 {
94 #ifdef __BIG_ENDIAN
95         return (vni[0] << 16) | (vni[1] << 8) | vni[2];
96 #else
97         return (__force __be64)(((__force u64)vni[0] << 40) |
98                                 ((__force u64)vni[1] << 48) |
99                                 ((__force u64)vni[2] << 56));
100 #endif
101 }
102
103 /* Convert 64 bit tunnel ID to 24 bit VNI. */
104 static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
105 {
106 #ifdef __BIG_ENDIAN
107         vni[0] = (__force __u8)(tun_id >> 16);
108         vni[1] = (__force __u8)(tun_id >> 8);
109         vni[2] = (__force __u8)tun_id;
110 #else
111         vni[0] = (__force __u8)((__force u64)tun_id >> 40);
112         vni[1] = (__force __u8)((__force u64)tun_id >> 48);
113         vni[2] = (__force __u8)((__force u64)tun_id >> 56);
114 #endif
115 }
116
117 static bool eq_tun_id_and_vni(u8 *tun_id, u8 *vni)
118 {
119         return !memcmp(vni, &tun_id[5], 3);
120 }
121
122 static sa_family_t geneve_get_sk_family(struct geneve_sock *gs)
123 {
124         return gs->sock->sk->sk_family;
125 }
126
127 static struct geneve_dev *geneve_lookup(struct geneve_sock *gs,
128                                         __be32 addr, u8 vni[])
129 {
130         struct hlist_head *vni_list_head;
131         struct geneve_dev_node *node;
132         __u32 hash;
133
134         /* Find the device for this VNI */
135         hash = geneve_net_vni_hash(vni);
136         vni_list_head = &gs->vni_list[hash];
137         hlist_for_each_entry_rcu(node, vni_list_head, hlist) {
138                 if (eq_tun_id_and_vni((u8 *)&node->geneve->info.key.tun_id, vni) &&
139                     addr == node->geneve->info.key.u.ipv4.dst)
140                         return node->geneve;
141         }
142         return NULL;
143 }
144
145 #if IS_ENABLED(CONFIG_IPV6)
146 static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs,
147                                          struct in6_addr addr6, u8 vni[])
148 {
149         struct hlist_head *vni_list_head;
150         struct geneve_dev_node *node;
151         __u32 hash;
152
153         /* Find the device for this VNI */
154         hash = geneve_net_vni_hash(vni);
155         vni_list_head = &gs->vni_list[hash];
156         hlist_for_each_entry_rcu(node, vni_list_head, hlist) {
157                 if (eq_tun_id_and_vni((u8 *)&node->geneve->info.key.tun_id, vni) &&
158                     ipv6_addr_equal(&addr6, &node->geneve->info.key.u.ipv6.dst))
159                         return node->geneve;
160         }
161         return NULL;
162 }
163 #endif
164
165 static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
166 {
167         return (struct genevehdr *)(udp_hdr(skb) + 1);
168 }
169
170 static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs,
171                                             struct sk_buff *skb)
172 {
173         static u8 zero_vni[3];
174         u8 *vni;
175
176         if (geneve_get_sk_family(gs) == AF_INET) {
177                 struct iphdr *iph;
178                 __be32 addr;
179
180                 iph = ip_hdr(skb); /* outer IP header... */
181
182                 if (gs->collect_md) {
183                         vni = zero_vni;
184                         addr = 0;
185                 } else {
186                         vni = geneve_hdr(skb)->vni;
187                         addr = iph->saddr;
188                 }
189
190                 return geneve_lookup(gs, addr, vni);
191 #if IS_ENABLED(CONFIG_IPV6)
192         } else if (geneve_get_sk_family(gs) == AF_INET6) {
193                 static struct in6_addr zero_addr6;
194                 struct ipv6hdr *ip6h;
195                 struct in6_addr addr6;
196
197                 ip6h = ipv6_hdr(skb); /* outer IPv6 header... */
198
199                 if (gs->collect_md) {
200                         vni = zero_vni;
201                         addr6 = zero_addr6;
202                 } else {
203                         vni = geneve_hdr(skb)->vni;
204                         addr6 = ip6h->saddr;
205                 }
206
207                 return geneve6_lookup(gs, addr6, vni);
208 #endif
209         }
210         return NULL;
211 }
212
213 /* geneve receive/decap routine */
214 static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
215                       struct sk_buff *skb)
216 {
217         struct genevehdr *gnvh = geneve_hdr(skb);
218         struct metadata_dst *tun_dst = NULL;
219         struct pcpu_sw_netstats *stats;
220         unsigned int len;
221         int err = 0;
222         void *oiph;
223
224         if (ip_tunnel_collect_metadata() || gs->collect_md) {
225                 __be16 flags;
226
227                 flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT |
228                         (gnvh->oam ? TUNNEL_OAM : 0) |
229                         (gnvh->critical ? TUNNEL_CRIT_OPT : 0);
230
231                 tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags,
232                                          vni_to_tunnel_id(gnvh->vni),
233                                          gnvh->opt_len * 4);
234                 if (!tun_dst) {
235                         geneve->dev->stats.rx_dropped++;
236                         goto drop;
237                 }
238                 /* Update tunnel dst according to Geneve options. */
239                 ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
240                                         gnvh->options, gnvh->opt_len * 4,
241                                         TUNNEL_GENEVE_OPT);
242         } else {
243                 /* Drop packets w/ critical options,
244                  * since we don't support any...
245                  */
246                 if (gnvh->critical) {
247                         geneve->dev->stats.rx_frame_errors++;
248                         geneve->dev->stats.rx_errors++;
249                         goto drop;
250                 }
251         }
252
253         skb_reset_mac_header(skb);
254         skb->protocol = eth_type_trans(skb, geneve->dev);
255         skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
256
257         if (tun_dst)
258                 skb_dst_set(skb, &tun_dst->dst);
259
260         /* Ignore packet loops (and multicast echo) */
261         if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) {
262                 geneve->dev->stats.rx_errors++;
263                 goto drop;
264         }
265
266         oiph = skb_network_header(skb);
267         skb_reset_network_header(skb);
268
269         if (geneve_get_sk_family(gs) == AF_INET)
270                 err = IP_ECN_decapsulate(oiph, skb);
271 #if IS_ENABLED(CONFIG_IPV6)
272         else
273                 err = IP6_ECN_decapsulate(oiph, skb);
274 #endif
275
276         if (unlikely(err)) {
277                 if (log_ecn_error) {
278                         if (geneve_get_sk_family(gs) == AF_INET)
279                                 net_info_ratelimited("non-ECT from %pI4 "
280                                                      "with TOS=%#x\n",
281                                                      &((struct iphdr *)oiph)->saddr,
282                                                      ((struct iphdr *)oiph)->tos);
283 #if IS_ENABLED(CONFIG_IPV6)
284                         else
285                                 net_info_ratelimited("non-ECT from %pI6\n",
286                                                      &((struct ipv6hdr *)oiph)->saddr);
287 #endif
288                 }
289                 if (err > 1) {
290                         ++geneve->dev->stats.rx_frame_errors;
291                         ++geneve->dev->stats.rx_errors;
292                         goto drop;
293                 }
294         }
295
296         len = skb->len;
297         err = gro_cells_receive(&geneve->gro_cells, skb);
298         if (likely(err == NET_RX_SUCCESS)) {
299                 stats = this_cpu_ptr(geneve->dev->tstats);
300                 u64_stats_update_begin(&stats->syncp);
301                 stats->rx_packets++;
302                 stats->rx_bytes += len;
303                 u64_stats_update_end(&stats->syncp);
304         }
305         return;
306 drop:
307         /* Consume bad packet */
308         kfree_skb(skb);
309 }
310
311 /* Setup stats when device is created */
312 static int geneve_init(struct net_device *dev)
313 {
314         struct geneve_dev *geneve = netdev_priv(dev);
315         int err;
316
317         dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
318         if (!dev->tstats)
319                 return -ENOMEM;
320
321         err = gro_cells_init(&geneve->gro_cells, dev);
322         if (err) {
323                 free_percpu(dev->tstats);
324                 return err;
325         }
326
327         err = dst_cache_init(&geneve->info.dst_cache, GFP_KERNEL);
328         if (err) {
329                 free_percpu(dev->tstats);
330                 gro_cells_destroy(&geneve->gro_cells);
331                 return err;
332         }
333         return 0;
334 }
335
336 static void geneve_uninit(struct net_device *dev)
337 {
338         struct geneve_dev *geneve = netdev_priv(dev);
339
340         dst_cache_destroy(&geneve->info.dst_cache);
341         gro_cells_destroy(&geneve->gro_cells);
342         free_percpu(dev->tstats);
343 }
344
345 /* Callback from net/ipv4/udp.c to receive packets */
346 static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
347 {
348         struct genevehdr *geneveh;
349         struct geneve_dev *geneve;
350         struct geneve_sock *gs;
351         int opts_len;
352
353         /* Need UDP and Geneve header to be present */
354         if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN)))
355                 goto drop;
356
357         /* Return packets with reserved bits set */
358         geneveh = geneve_hdr(skb);
359         if (unlikely(geneveh->ver != GENEVE_VER))
360                 goto drop;
361
362         if (unlikely(geneveh->proto_type != htons(ETH_P_TEB)))
363                 goto drop;
364
365         gs = rcu_dereference_sk_user_data(sk);
366         if (!gs)
367                 goto drop;
368
369         geneve = geneve_lookup_skb(gs, skb);
370         if (!geneve)
371                 goto drop;
372
373         opts_len = geneveh->opt_len * 4;
374         if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len,
375                                  htons(ETH_P_TEB),
376                                  !net_eq(geneve->net, dev_net(geneve->dev)))) {
377                 geneve->dev->stats.rx_dropped++;
378                 goto drop;
379         }
380
381         geneve_rx(geneve, gs, skb);
382         return 0;
383
384 drop:
385         /* Consume bad packet */
386         kfree_skb(skb);
387         return 0;
388 }
389
390 /* Callback from net/ipv{4,6}/udp.c to check that we have a tunnel for errors */
391 static int geneve_udp_encap_err_lookup(struct sock *sk, struct sk_buff *skb)
392 {
393         struct genevehdr *geneveh;
394         struct geneve_sock *gs;
395         u8 zero_vni[3] = { 0 };
396         u8 *vni = zero_vni;
397
398         if (skb->len < GENEVE_BASE_HLEN)
399                 return -EINVAL;
400
401         geneveh = geneve_hdr(skb);
402         if (geneveh->ver != GENEVE_VER)
403                 return -EINVAL;
404
405         if (geneveh->proto_type != htons(ETH_P_TEB))
406                 return -EINVAL;
407
408         gs = rcu_dereference_sk_user_data(sk);
409         if (!gs)
410                 return -ENOENT;
411
412         if (geneve_get_sk_family(gs) == AF_INET) {
413                 struct iphdr *iph = ip_hdr(skb);
414                 __be32 addr4 = 0;
415
416                 if (!gs->collect_md) {
417                         vni = geneve_hdr(skb)->vni;
418                         addr4 = iph->daddr;
419                 }
420
421                 return geneve_lookup(gs, addr4, vni) ? 0 : -ENOENT;
422         }
423
424 #if IS_ENABLED(CONFIG_IPV6)
425         if (geneve_get_sk_family(gs) == AF_INET6) {
426                 struct ipv6hdr *ip6h = ipv6_hdr(skb);
427                 struct in6_addr addr6 = { 0 };
428
429                 if (!gs->collect_md) {
430                         vni = geneve_hdr(skb)->vni;
431                         addr6 = ip6h->daddr;
432                 }
433
434                 return geneve6_lookup(gs, addr6, vni) ? 0 : -ENOENT;
435         }
436 #endif
437
438         return -EPFNOSUPPORT;
439 }
440
441 static struct socket *geneve_create_sock(struct net *net, bool ipv6,
442                                          __be16 port, bool ipv6_rx_csum)
443 {
444         struct socket *sock;
445         struct udp_port_cfg udp_conf;
446         int err;
447
448         memset(&udp_conf, 0, sizeof(udp_conf));
449
450         if (ipv6) {
451                 udp_conf.family = AF_INET6;
452                 udp_conf.ipv6_v6only = 1;
453                 udp_conf.use_udp6_rx_checksums = ipv6_rx_csum;
454         } else {
455                 udp_conf.family = AF_INET;
456                 udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
457         }
458
459         udp_conf.local_udp_port = port;
460
461         /* Open UDP socket */
462         err = udp_sock_create(net, &udp_conf, &sock);
463         if (err < 0)
464                 return ERR_PTR(err);
465
466         return sock;
467 }
468
469 static int geneve_hlen(struct genevehdr *gh)
470 {
471         return sizeof(*gh) + gh->opt_len * 4;
472 }
473
474 static struct sk_buff *geneve_gro_receive(struct sock *sk,
475                                           struct list_head *head,
476                                           struct sk_buff *skb)
477 {
478         struct sk_buff *pp = NULL;
479         struct sk_buff *p;
480         struct genevehdr *gh, *gh2;
481         unsigned int hlen, gh_len, off_gnv;
482         const struct packet_offload *ptype;
483         __be16 type;
484         int flush = 1;
485
486         off_gnv = skb_gro_offset(skb);
487         hlen = off_gnv + sizeof(*gh);
488         gh = skb_gro_header_fast(skb, off_gnv);
489         if (skb_gro_header_hard(skb, hlen)) {
490                 gh = skb_gro_header_slow(skb, hlen, off_gnv);
491                 if (unlikely(!gh))
492                         goto out;
493         }
494
495         if (gh->ver != GENEVE_VER || gh->oam)
496                 goto out;
497         gh_len = geneve_hlen(gh);
498
499         hlen = off_gnv + gh_len;
500         if (skb_gro_header_hard(skb, hlen)) {
501                 gh = skb_gro_header_slow(skb, hlen, off_gnv);
502                 if (unlikely(!gh))
503                         goto out;
504         }
505
506         list_for_each_entry(p, head, list) {
507                 if (!NAPI_GRO_CB(p)->same_flow)
508                         continue;
509
510                 gh2 = (struct genevehdr *)(p->data + off_gnv);
511                 if (gh->opt_len != gh2->opt_len ||
512                     memcmp(gh, gh2, gh_len)) {
513                         NAPI_GRO_CB(p)->same_flow = 0;
514                         continue;
515                 }
516         }
517
518         type = gh->proto_type;
519
520         rcu_read_lock();
521         ptype = gro_find_receive_by_type(type);
522         if (!ptype)
523                 goto out_unlock;
524
525         skb_gro_pull(skb, gh_len);
526         skb_gro_postpull_rcsum(skb, gh, gh_len);
527         pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
528         flush = 0;
529
530 out_unlock:
531         rcu_read_unlock();
532 out:
533         skb_gro_flush_final(skb, pp, flush);
534
535         return pp;
536 }
537
538 static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb,
539                                int nhoff)
540 {
541         struct genevehdr *gh;
542         struct packet_offload *ptype;
543         __be16 type;
544         int gh_len;
545         int err = -ENOSYS;
546
547         gh = (struct genevehdr *)(skb->data + nhoff);
548         gh_len = geneve_hlen(gh);
549         type = gh->proto_type;
550
551         rcu_read_lock();
552         ptype = gro_find_complete_by_type(type);
553         if (ptype)
554                 err = ptype->callbacks.gro_complete(skb, nhoff + gh_len);
555
556         rcu_read_unlock();
557
558         skb_set_inner_mac_header(skb, nhoff + gh_len);
559
560         return err;
561 }
562
563 /* Create new listen socket if needed */
564 static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
565                                                 bool ipv6, bool ipv6_rx_csum)
566 {
567         struct geneve_net *gn = net_generic(net, geneve_net_id);
568         struct geneve_sock *gs;
569         struct socket *sock;
570         struct udp_tunnel_sock_cfg tunnel_cfg;
571         int h;
572
573         gs = kzalloc(sizeof(*gs), GFP_KERNEL);
574         if (!gs)
575                 return ERR_PTR(-ENOMEM);
576
577         sock = geneve_create_sock(net, ipv6, port, ipv6_rx_csum);
578         if (IS_ERR(sock)) {
579                 kfree(gs);
580                 return ERR_CAST(sock);
581         }
582
583         gs->sock = sock;
584         gs->refcnt = 1;
585         for (h = 0; h < VNI_HASH_SIZE; ++h)
586                 INIT_HLIST_HEAD(&gs->vni_list[h]);
587
588         /* Initialize the geneve udp offloads structure */
589         udp_tunnel_notify_add_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE);
590
591         /* Mark socket as an encapsulation socket */
592         memset(&tunnel_cfg, 0, sizeof(tunnel_cfg));
593         tunnel_cfg.sk_user_data = gs;
594         tunnel_cfg.encap_type = 1;
595         tunnel_cfg.gro_receive = geneve_gro_receive;
596         tunnel_cfg.gro_complete = geneve_gro_complete;
597         tunnel_cfg.encap_rcv = geneve_udp_encap_recv;
598         tunnel_cfg.encap_err_lookup = geneve_udp_encap_err_lookup;
599         tunnel_cfg.encap_destroy = NULL;
600         setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
601         list_add(&gs->list, &gn->sock_list);
602         return gs;
603 }
604
605 static void __geneve_sock_release(struct geneve_sock *gs)
606 {
607         if (!gs || --gs->refcnt)
608                 return;
609
610         list_del(&gs->list);
611         udp_tunnel_notify_del_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE);
612         udp_tunnel_sock_release(gs->sock);
613         kfree_rcu(gs, rcu);
614 }
615
616 static void geneve_sock_release(struct geneve_dev *geneve)
617 {
618         struct geneve_sock *gs4 = rtnl_dereference(geneve->sock4);
619 #if IS_ENABLED(CONFIG_IPV6)
620         struct geneve_sock *gs6 = rtnl_dereference(geneve->sock6);
621
622         rcu_assign_pointer(geneve->sock6, NULL);
623 #endif
624
625         rcu_assign_pointer(geneve->sock4, NULL);
626         synchronize_net();
627
628         __geneve_sock_release(gs4);
629 #if IS_ENABLED(CONFIG_IPV6)
630         __geneve_sock_release(gs6);
631 #endif
632 }
633
634 static struct geneve_sock *geneve_find_sock(struct geneve_net *gn,
635                                             sa_family_t family,
636                                             __be16 dst_port)
637 {
638         struct geneve_sock *gs;
639
640         list_for_each_entry(gs, &gn->sock_list, list) {
641                 if (inet_sk(gs->sock->sk)->inet_sport == dst_port &&
642                     geneve_get_sk_family(gs) == family) {
643                         return gs;
644                 }
645         }
646         return NULL;
647 }
648
649 static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6)
650 {
651         struct net *net = geneve->net;
652         struct geneve_net *gn = net_generic(net, geneve_net_id);
653         struct geneve_dev_node *node;
654         struct geneve_sock *gs;
655         __u8 vni[3];
656         __u32 hash;
657
658         gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET, geneve->info.key.tp_dst);
659         if (gs) {
660                 gs->refcnt++;
661                 goto out;
662         }
663
664         gs = geneve_socket_create(net, geneve->info.key.tp_dst, ipv6,
665                                   geneve->use_udp6_rx_checksums);
666         if (IS_ERR(gs))
667                 return PTR_ERR(gs);
668
669 out:
670         gs->collect_md = geneve->collect_md;
671 #if IS_ENABLED(CONFIG_IPV6)
672         if (ipv6) {
673                 rcu_assign_pointer(geneve->sock6, gs);
674                 node = &geneve->hlist6;
675         } else
676 #endif
677         {
678                 rcu_assign_pointer(geneve->sock4, gs);
679                 node = &geneve->hlist4;
680         }
681         node->geneve = geneve;
682
683         tunnel_id_to_vni(geneve->info.key.tun_id, vni);
684         hash = geneve_net_vni_hash(vni);
685         hlist_add_head_rcu(&node->hlist, &gs->vni_list[hash]);
686         return 0;
687 }
688
689 static int geneve_open(struct net_device *dev)
690 {
691         struct geneve_dev *geneve = netdev_priv(dev);
692         bool ipv6 = !!(geneve->info.mode & IP_TUNNEL_INFO_IPV6);
693         bool metadata = geneve->collect_md;
694         int ret = 0;
695
696 #if IS_ENABLED(CONFIG_IPV6)
697         if (ipv6 || metadata)
698                 ret = geneve_sock_add(geneve, true);
699 #endif
700         if (!ret && (!ipv6 || metadata))
701                 ret = geneve_sock_add(geneve, false);
702         if (ret < 0)
703                 geneve_sock_release(geneve);
704
705         return ret;
706 }
707
708 static int geneve_stop(struct net_device *dev)
709 {
710         struct geneve_dev *geneve = netdev_priv(dev);
711
712         hlist_del_init_rcu(&geneve->hlist4.hlist);
713 #if IS_ENABLED(CONFIG_IPV6)
714         hlist_del_init_rcu(&geneve->hlist6.hlist);
715 #endif
716         geneve_sock_release(geneve);
717         return 0;
718 }
719
720 static void geneve_build_header(struct genevehdr *geneveh,
721                                 const struct ip_tunnel_info *info)
722 {
723         geneveh->ver = GENEVE_VER;
724         geneveh->opt_len = info->options_len / 4;
725         geneveh->oam = !!(info->key.tun_flags & TUNNEL_OAM);
726         geneveh->critical = !!(info->key.tun_flags & TUNNEL_CRIT_OPT);
727         geneveh->rsvd1 = 0;
728         tunnel_id_to_vni(info->key.tun_id, geneveh->vni);
729         geneveh->proto_type = htons(ETH_P_TEB);
730         geneveh->rsvd2 = 0;
731
732         if (info->key.tun_flags & TUNNEL_GENEVE_OPT)
733                 ip_tunnel_info_opts_get(geneveh->options, info);
734 }
735
736 static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb,
737                             const struct ip_tunnel_info *info,
738                             bool xnet, int ip_hdr_len)
739 {
740         bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
741         struct genevehdr *gnvh;
742         int min_headroom;
743         int err;
744
745         skb_reset_mac_header(skb);
746         skb_scrub_packet(skb, xnet);
747
748         min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len +
749                        GENEVE_BASE_HLEN + info->options_len + ip_hdr_len;
750         err = skb_cow_head(skb, min_headroom);
751         if (unlikely(err))
752                 goto free_dst;
753
754         err = udp_tunnel_handle_offloads(skb, udp_sum);
755         if (err)
756                 goto free_dst;
757
758         gnvh = __skb_push(skb, sizeof(*gnvh) + info->options_len);
759         geneve_build_header(gnvh, info);
760         skb_set_inner_protocol(skb, htons(ETH_P_TEB));
761         return 0;
762
763 free_dst:
764         dst_release(dst);
765         return err;
766 }
767
768 static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
769                                        struct net_device *dev,
770                                        struct geneve_sock *gs4,
771                                        struct flowi4 *fl4,
772                                        const struct ip_tunnel_info *info)
773 {
774         bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
775         struct geneve_dev *geneve = netdev_priv(dev);
776         struct dst_cache *dst_cache;
777         struct rtable *rt = NULL;
778         __u8 tos;
779
780         if (!gs4)
781                 return ERR_PTR(-EIO);
782
783         memset(fl4, 0, sizeof(*fl4));
784         fl4->flowi4_mark = skb->mark;
785         fl4->flowi4_proto = IPPROTO_UDP;
786         fl4->daddr = info->key.u.ipv4.dst;
787         fl4->saddr = info->key.u.ipv4.src;
788
789         tos = info->key.tos;
790         if ((tos == 1) && !geneve->collect_md) {
791                 tos = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
792                 use_cache = false;
793         }
794         fl4->flowi4_tos = RT_TOS(tos);
795
796         dst_cache = (struct dst_cache *)&info->dst_cache;
797         if (use_cache) {
798                 rt = dst_cache_get_ip4(dst_cache, &fl4->saddr);
799                 if (rt)
800                         return rt;
801         }
802         rt = ip_route_output_key(geneve->net, fl4);
803         if (IS_ERR(rt)) {
804                 netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr);
805                 return ERR_PTR(-ENETUNREACH);
806         }
807         if (rt->dst.dev == dev) { /* is this necessary? */
808                 netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr);
809                 ip_rt_put(rt);
810                 return ERR_PTR(-ELOOP);
811         }
812         if (use_cache)
813                 dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr);
814         return rt;
815 }
816
817 #if IS_ENABLED(CONFIG_IPV6)
818 static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
819                                            struct net_device *dev,
820                                            struct geneve_sock *gs6,
821                                            struct flowi6 *fl6,
822                                            const struct ip_tunnel_info *info)
823 {
824         bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
825         struct geneve_dev *geneve = netdev_priv(dev);
826         struct dst_entry *dst = NULL;
827         struct dst_cache *dst_cache;
828         __u8 prio;
829
830         if (!gs6)
831                 return ERR_PTR(-EIO);
832
833         memset(fl6, 0, sizeof(*fl6));
834         fl6->flowi6_mark = skb->mark;
835         fl6->flowi6_proto = IPPROTO_UDP;
836         fl6->daddr = info->key.u.ipv6.dst;
837         fl6->saddr = info->key.u.ipv6.src;
838         prio = info->key.tos;
839         if ((prio == 1) && !geneve->collect_md) {
840                 prio = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
841                 use_cache = false;
842         }
843
844         fl6->flowlabel = ip6_make_flowinfo(RT_TOS(prio),
845                                            info->key.label);
846         dst_cache = (struct dst_cache *)&info->dst_cache;
847         if (use_cache) {
848                 dst = dst_cache_get_ip6(dst_cache, &fl6->saddr);
849                 if (dst)
850                         return dst;
851         }
852         if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) {
853                 netdev_dbg(dev, "no route to %pI6\n", &fl6->daddr);
854                 return ERR_PTR(-ENETUNREACH);
855         }
856         if (dst->dev == dev) { /* is this necessary? */
857                 netdev_dbg(dev, "circular route to %pI6\n", &fl6->daddr);
858                 dst_release(dst);
859                 return ERR_PTR(-ELOOP);
860         }
861
862         if (use_cache)
863                 dst_cache_set_ip6(dst_cache, dst, &fl6->saddr);
864         return dst;
865 }
866 #endif
867
868 static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
869                            struct geneve_dev *geneve,
870                            const struct ip_tunnel_info *info)
871 {
872         bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
873         struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
874         const struct ip_tunnel_key *key = &info->key;
875         struct rtable *rt;
876         struct flowi4 fl4;
877         __u8 tos, ttl;
878         __be16 sport;
879         __be16 df;
880         int err;
881
882         rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info);
883         if (IS_ERR(rt))
884                 return PTR_ERR(rt);
885
886         skb_tunnel_check_pmtu(skb, &rt->dst,
887                               GENEVE_IPV4_HLEN + info->options_len);
888
889         sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
890         if (geneve->collect_md) {
891                 tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
892                 ttl = key->ttl;
893         } else {
894                 tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, ip_hdr(skb), skb);
895                 if (geneve->ttl_inherit)
896                         ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb);
897                 else
898                         ttl = key->ttl;
899                 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
900         }
901         df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
902
903         err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr));
904         if (unlikely(err))
905                 return err;
906
907         udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr,
908                             tos, ttl, df, sport, geneve->info.key.tp_dst,
909                             !net_eq(geneve->net, dev_net(geneve->dev)),
910                             !(info->key.tun_flags & TUNNEL_CSUM));
911         return 0;
912 }
913
914 #if IS_ENABLED(CONFIG_IPV6)
915 static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
916                             struct geneve_dev *geneve,
917                             const struct ip_tunnel_info *info)
918 {
919         bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
920         struct geneve_sock *gs6 = rcu_dereference(geneve->sock6);
921         const struct ip_tunnel_key *key = &info->key;
922         struct dst_entry *dst = NULL;
923         struct flowi6 fl6;
924         __u8 prio, ttl;
925         __be16 sport;
926         int err;
927
928         dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info);
929         if (IS_ERR(dst))
930                 return PTR_ERR(dst);
931
932         skb_tunnel_check_pmtu(skb, dst, GENEVE_IPV6_HLEN + info->options_len);
933
934         sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
935         if (geneve->collect_md) {
936                 prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
937                 ttl = key->ttl;
938         } else {
939                 prio = ip_tunnel_ecn_encap(ip6_tclass(fl6.flowlabel),
940                                            ip_hdr(skb), skb);
941                 if (geneve->ttl_inherit)
942                         ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb);
943                 else
944                         ttl = key->ttl;
945                 ttl = ttl ? : ip6_dst_hoplimit(dst);
946         }
947         err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr));
948         if (unlikely(err))
949                 return err;
950
951         udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev,
952                              &fl6.saddr, &fl6.daddr, prio, ttl,
953                              info->key.label, sport, geneve->info.key.tp_dst,
954                              !(info->key.tun_flags & TUNNEL_CSUM));
955         return 0;
956 }
957 #endif
958
959 static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
960 {
961         struct geneve_dev *geneve = netdev_priv(dev);
962         struct ip_tunnel_info *info = NULL;
963         int err;
964
965         if (geneve->collect_md) {
966                 info = skb_tunnel_info(skb);
967                 if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) {
968                         err = -EINVAL;
969                         netdev_dbg(dev, "no tunnel metadata\n");
970                         goto tx_error;
971                 }
972         } else {
973                 info = &geneve->info;
974         }
975
976         rcu_read_lock();
977 #if IS_ENABLED(CONFIG_IPV6)
978         if (info->mode & IP_TUNNEL_INFO_IPV6)
979                 err = geneve6_xmit_skb(skb, dev, geneve, info);
980         else
981 #endif
982                 err = geneve_xmit_skb(skb, dev, geneve, info);
983         rcu_read_unlock();
984
985         if (likely(!err))
986                 return NETDEV_TX_OK;
987 tx_error:
988         dev_kfree_skb(skb);
989
990         if (err == -ELOOP)
991                 dev->stats.collisions++;
992         else if (err == -ENETUNREACH)
993                 dev->stats.tx_carrier_errors++;
994
995         dev->stats.tx_errors++;
996         return NETDEV_TX_OK;
997 }
998
999 static int geneve_change_mtu(struct net_device *dev, int new_mtu)
1000 {
1001         if (new_mtu > dev->max_mtu)
1002                 new_mtu = dev->max_mtu;
1003         else if (new_mtu < dev->min_mtu)
1004                 new_mtu = dev->min_mtu;
1005
1006         dev->mtu = new_mtu;
1007         return 0;
1008 }
1009
1010 static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
1011 {
1012         struct ip_tunnel_info *info = skb_tunnel_info(skb);
1013         struct geneve_dev *geneve = netdev_priv(dev);
1014
1015         if (ip_tunnel_info_af(info) == AF_INET) {
1016                 struct rtable *rt;
1017                 struct flowi4 fl4;
1018                 struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
1019
1020                 rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info);
1021                 if (IS_ERR(rt))
1022                         return PTR_ERR(rt);
1023
1024                 ip_rt_put(rt);
1025                 info->key.u.ipv4.src = fl4.saddr;
1026 #if IS_ENABLED(CONFIG_IPV6)
1027         } else if (ip_tunnel_info_af(info) == AF_INET6) {
1028                 struct dst_entry *dst;
1029                 struct flowi6 fl6;
1030                 struct geneve_sock *gs6 = rcu_dereference(geneve->sock6);
1031
1032                 dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info);
1033                 if (IS_ERR(dst))
1034                         return PTR_ERR(dst);
1035
1036                 dst_release(dst);
1037                 info->key.u.ipv6.src = fl6.saddr;
1038 #endif
1039         } else {
1040                 return -EINVAL;
1041         }
1042
1043         info->key.tp_src = udp_flow_src_port(geneve->net, skb,
1044                                              1, USHRT_MAX, true);
1045         info->key.tp_dst = geneve->info.key.tp_dst;
1046         return 0;
1047 }
1048
1049 static const struct net_device_ops geneve_netdev_ops = {
1050         .ndo_init               = geneve_init,
1051         .ndo_uninit             = geneve_uninit,
1052         .ndo_open               = geneve_open,
1053         .ndo_stop               = geneve_stop,
1054         .ndo_start_xmit         = geneve_xmit,
1055         .ndo_get_stats64        = ip_tunnel_get_stats64,
1056         .ndo_change_mtu         = geneve_change_mtu,
1057         .ndo_validate_addr      = eth_validate_addr,
1058         .ndo_set_mac_address    = eth_mac_addr,
1059         .ndo_fill_metadata_dst  = geneve_fill_metadata_dst,
1060 };
1061
1062 static void geneve_get_drvinfo(struct net_device *dev,
1063                                struct ethtool_drvinfo *drvinfo)
1064 {
1065         strlcpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version));
1066         strlcpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver));
1067 }
1068
1069 static const struct ethtool_ops geneve_ethtool_ops = {
1070         .get_drvinfo    = geneve_get_drvinfo,
1071         .get_link       = ethtool_op_get_link,
1072 };
1073
1074 /* Info for udev, that this is a virtual tunnel endpoint */
1075 static struct device_type geneve_type = {
1076         .name = "geneve",
1077 };
1078
1079 /* Calls the ndo_udp_tunnel_add of the caller in order to
1080  * supply the listening GENEVE udp ports. Callers are expected
1081  * to implement the ndo_udp_tunnel_add.
1082  */
1083 static void geneve_offload_rx_ports(struct net_device *dev, bool push)
1084 {
1085         struct net *net = dev_net(dev);
1086         struct geneve_net *gn = net_generic(net, geneve_net_id);
1087         struct geneve_sock *gs;
1088
1089         rcu_read_lock();
1090         list_for_each_entry_rcu(gs, &gn->sock_list, list) {
1091                 if (push) {
1092                         udp_tunnel_push_rx_port(dev, gs->sock,
1093                                                 UDP_TUNNEL_TYPE_GENEVE);
1094                 } else {
1095                         udp_tunnel_drop_rx_port(dev, gs->sock,
1096                                                 UDP_TUNNEL_TYPE_GENEVE);
1097                 }
1098         }
1099         rcu_read_unlock();
1100 }
1101
1102 /* Initialize the device structure. */
1103 static void geneve_setup(struct net_device *dev)
1104 {
1105         ether_setup(dev);
1106
1107         dev->netdev_ops = &geneve_netdev_ops;
1108         dev->ethtool_ops = &geneve_ethtool_ops;
1109         dev->needs_free_netdev = true;
1110
1111         SET_NETDEV_DEVTYPE(dev, &geneve_type);
1112
1113         dev->features    |= NETIF_F_LLTX;
1114         dev->features    |= NETIF_F_SG | NETIF_F_HW_CSUM;
1115         dev->features    |= NETIF_F_RXCSUM;
1116         dev->features    |= NETIF_F_GSO_SOFTWARE;
1117
1118         dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
1119         dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1120
1121         /* MTU range: 68 - (something less than 65535) */
1122         dev->min_mtu = ETH_MIN_MTU;
1123         /* The max_mtu calculation does not take account of GENEVE
1124          * options, to avoid excluding potentially valid
1125          * configurations. This will be further reduced by IPvX hdr size.
1126          */
1127         dev->max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len;
1128
1129         netif_keep_dst(dev);
1130         dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1131         dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
1132         eth_hw_addr_random(dev);
1133 }
1134
1135 static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
1136         [IFLA_GENEVE_ID]                = { .type = NLA_U32 },
1137         [IFLA_GENEVE_REMOTE]            = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1138         [IFLA_GENEVE_REMOTE6]           = { .len = sizeof(struct in6_addr) },
1139         [IFLA_GENEVE_TTL]               = { .type = NLA_U8 },
1140         [IFLA_GENEVE_TOS]               = { .type = NLA_U8 },
1141         [IFLA_GENEVE_LABEL]             = { .type = NLA_U32 },
1142         [IFLA_GENEVE_PORT]              = { .type = NLA_U16 },
1143         [IFLA_GENEVE_COLLECT_METADATA]  = { .type = NLA_FLAG },
1144         [IFLA_GENEVE_UDP_CSUM]          = { .type = NLA_U8 },
1145         [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 },
1146         [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 },
1147         [IFLA_GENEVE_TTL_INHERIT]       = { .type = NLA_U8 },
1148 };
1149
1150 static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
1151                            struct netlink_ext_ack *extack)
1152 {
1153         if (tb[IFLA_ADDRESS]) {
1154                 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
1155                         NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
1156                                             "Provided link layer address is not Ethernet");
1157                         return -EINVAL;
1158                 }
1159
1160                 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
1161                         NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
1162                                             "Provided Ethernet address is not unicast");
1163                         return -EADDRNOTAVAIL;
1164                 }
1165         }
1166
1167         if (!data) {
1168                 NL_SET_ERR_MSG(extack,
1169                                "Not enough attributes provided to perform the operation");
1170                 return -EINVAL;
1171         }
1172
1173         if (data[IFLA_GENEVE_ID]) {
1174                 __u32 vni =  nla_get_u32(data[IFLA_GENEVE_ID]);
1175
1176                 if (vni >= GENEVE_N_VID) {
1177                         NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_ID],
1178                                             "Geneve ID must be lower than 16777216");
1179                         return -ERANGE;
1180                 }
1181         }
1182
1183         return 0;
1184 }
1185
1186 static struct geneve_dev *geneve_find_dev(struct geneve_net *gn,
1187                                           const struct ip_tunnel_info *info,
1188                                           bool *tun_on_same_port,
1189                                           bool *tun_collect_md)
1190 {
1191         struct geneve_dev *geneve, *t = NULL;
1192
1193         *tun_on_same_port = false;
1194         *tun_collect_md = false;
1195         list_for_each_entry(geneve, &gn->geneve_list, next) {
1196                 if (info->key.tp_dst == geneve->info.key.tp_dst) {
1197                         *tun_collect_md = geneve->collect_md;
1198                         *tun_on_same_port = true;
1199                 }
1200                 if (info->key.tun_id == geneve->info.key.tun_id &&
1201                     info->key.tp_dst == geneve->info.key.tp_dst &&
1202                     !memcmp(&info->key.u, &geneve->info.key.u, sizeof(info->key.u)))
1203                         t = geneve;
1204         }
1205         return t;
1206 }
1207
1208 static bool is_tnl_info_zero(const struct ip_tunnel_info *info)
1209 {
1210         return !(info->key.tun_id || info->key.tun_flags || info->key.tos ||
1211                  info->key.ttl || info->key.label || info->key.tp_src ||
1212                  memchr_inv(&info->key.u, 0, sizeof(info->key.u)));
1213 }
1214
1215 static bool geneve_dst_addr_equal(struct ip_tunnel_info *a,
1216                                   struct ip_tunnel_info *b)
1217 {
1218         if (ip_tunnel_info_af(a) == AF_INET)
1219                 return a->key.u.ipv4.dst == b->key.u.ipv4.dst;
1220         else
1221                 return ipv6_addr_equal(&a->key.u.ipv6.dst, &b->key.u.ipv6.dst);
1222 }
1223
1224 static int geneve_configure(struct net *net, struct net_device *dev,
1225                             struct netlink_ext_ack *extack,
1226                             const struct ip_tunnel_info *info,
1227                             bool metadata, bool ipv6_rx_csum,
1228                             bool ttl_inherit)
1229 {
1230         struct geneve_net *gn = net_generic(net, geneve_net_id);
1231         struct geneve_dev *t, *geneve = netdev_priv(dev);
1232         bool tun_collect_md, tun_on_same_port;
1233         int err, encap_len;
1234
1235         if (metadata && !is_tnl_info_zero(info)) {
1236                 NL_SET_ERR_MSG(extack,
1237                                "Device is externally controlled, so attributes (VNI, Port, and so on) must not be specified");
1238                 return -EINVAL;
1239         }
1240
1241         geneve->net = net;
1242         geneve->dev = dev;
1243
1244         t = geneve_find_dev(gn, info, &tun_on_same_port, &tun_collect_md);
1245         if (t)
1246                 return -EBUSY;
1247
1248         /* make enough headroom for basic scenario */
1249         encap_len = GENEVE_BASE_HLEN + ETH_HLEN;
1250         if (!metadata && ip_tunnel_info_af(info) == AF_INET) {
1251                 encap_len += sizeof(struct iphdr);
1252                 dev->max_mtu -= sizeof(struct iphdr);
1253         } else {
1254                 encap_len += sizeof(struct ipv6hdr);
1255                 dev->max_mtu -= sizeof(struct ipv6hdr);
1256         }
1257         dev->needed_headroom = encap_len + ETH_HLEN;
1258
1259         if (metadata) {
1260                 if (tun_on_same_port) {
1261                         NL_SET_ERR_MSG(extack,
1262                                        "There can be only one externally controlled device on a destination port");
1263                         return -EPERM;
1264                 }
1265         } else {
1266                 if (tun_collect_md) {
1267                         NL_SET_ERR_MSG(extack,
1268                                        "There already exists an externally controlled device on this destination port");
1269                         return -EPERM;
1270                 }
1271         }
1272
1273         dst_cache_reset(&geneve->info.dst_cache);
1274         geneve->info = *info;
1275         geneve->collect_md = metadata;
1276         geneve->use_udp6_rx_checksums = ipv6_rx_csum;
1277         geneve->ttl_inherit = ttl_inherit;
1278
1279         err = register_netdevice(dev);
1280         if (err)
1281                 return err;
1282
1283         list_add(&geneve->next, &gn->geneve_list);
1284         return 0;
1285 }
1286
1287 static void init_tnl_info(struct ip_tunnel_info *info, __u16 dst_port)
1288 {
1289         memset(info, 0, sizeof(*info));
1290         info->key.tp_dst = htons(dst_port);
1291 }
1292
1293 static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
1294                           struct netlink_ext_ack *extack,
1295                           struct ip_tunnel_info *info, bool *metadata,
1296                           bool *use_udp6_rx_checksums, bool *ttl_inherit,
1297                           bool changelink)
1298 {
1299         int attrtype;
1300
1301         if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6]) {
1302                 NL_SET_ERR_MSG(extack,
1303                                "Cannot specify both IPv4 and IPv6 Remote addresses");
1304                 return -EINVAL;
1305         }
1306
1307         if (data[IFLA_GENEVE_REMOTE]) {
1308                 if (changelink && (ip_tunnel_info_af(info) == AF_INET6)) {
1309                         attrtype = IFLA_GENEVE_REMOTE;
1310                         goto change_notsup;
1311                 }
1312
1313                 info->key.u.ipv4.dst =
1314                         nla_get_in_addr(data[IFLA_GENEVE_REMOTE]);
1315
1316                 if (IN_MULTICAST(ntohl(info->key.u.ipv4.dst))) {
1317                         NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE],
1318                                             "Remote IPv4 address cannot be Multicast");
1319                         return -EINVAL;
1320                 }
1321         }
1322
1323         if (data[IFLA_GENEVE_REMOTE6]) {
1324 #if IS_ENABLED(CONFIG_IPV6)
1325                 if (changelink && (ip_tunnel_info_af(info) == AF_INET)) {
1326                         attrtype = IFLA_GENEVE_REMOTE6;
1327                         goto change_notsup;
1328                 }
1329
1330                 info->mode = IP_TUNNEL_INFO_IPV6;
1331                 info->key.u.ipv6.dst =
1332                         nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]);
1333
1334                 if (ipv6_addr_type(&info->key.u.ipv6.dst) &
1335                     IPV6_ADDR_LINKLOCAL) {
1336                         NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
1337                                             "Remote IPv6 address cannot be link-local");
1338                         return -EINVAL;
1339                 }
1340                 if (ipv6_addr_is_multicast(&info->key.u.ipv6.dst)) {
1341                         NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
1342                                             "Remote IPv6 address cannot be Multicast");
1343                         return -EINVAL;
1344                 }
1345                 info->key.tun_flags |= TUNNEL_CSUM;
1346                 *use_udp6_rx_checksums = true;
1347 #else
1348                 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
1349                                     "IPv6 support not enabled in the kernel");
1350                 return -EPFNOSUPPORT;
1351 #endif
1352         }
1353
1354         if (data[IFLA_GENEVE_ID]) {
1355                 __u32 vni;
1356                 __u8 tvni[3];
1357                 __be64 tunid;
1358
1359                 vni = nla_get_u32(data[IFLA_GENEVE_ID]);
1360                 tvni[0] = (vni & 0x00ff0000) >> 16;
1361                 tvni[1] = (vni & 0x0000ff00) >> 8;
1362                 tvni[2] =  vni & 0x000000ff;
1363
1364                 tunid = vni_to_tunnel_id(tvni);
1365                 if (changelink && (tunid != info->key.tun_id)) {
1366                         attrtype = IFLA_GENEVE_ID;
1367                         goto change_notsup;
1368                 }
1369                 info->key.tun_id = tunid;
1370         }
1371
1372         if (data[IFLA_GENEVE_TTL_INHERIT]) {
1373                 if (nla_get_u8(data[IFLA_GENEVE_TTL_INHERIT]))
1374                         *ttl_inherit = true;
1375                 else
1376                         *ttl_inherit = false;
1377         } else if (data[IFLA_GENEVE_TTL]) {
1378                 info->key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]);
1379                 *ttl_inherit = false;
1380         }
1381
1382         if (data[IFLA_GENEVE_TOS])
1383                 info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
1384
1385         if (data[IFLA_GENEVE_LABEL]) {
1386                 info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) &
1387                                   IPV6_FLOWLABEL_MASK;
1388                 if (info->key.label && (!(info->mode & IP_TUNNEL_INFO_IPV6))) {
1389                         NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LABEL],
1390                                             "Label attribute only applies for IPv6 Geneve devices");
1391                         return -EINVAL;
1392                 }
1393         }
1394
1395         if (data[IFLA_GENEVE_PORT]) {
1396                 if (changelink) {
1397                         attrtype = IFLA_GENEVE_PORT;
1398                         goto change_notsup;
1399                 }
1400                 info->key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]);
1401         }
1402
1403         if (data[IFLA_GENEVE_COLLECT_METADATA]) {
1404                 if (changelink) {
1405                         attrtype = IFLA_GENEVE_COLLECT_METADATA;
1406                         goto change_notsup;
1407                 }
1408                 *metadata = true;
1409         }
1410
1411         if (data[IFLA_GENEVE_UDP_CSUM]) {
1412                 if (changelink) {
1413                         attrtype = IFLA_GENEVE_UDP_CSUM;
1414                         goto change_notsup;
1415                 }
1416                 if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM]))
1417                         info->key.tun_flags |= TUNNEL_CSUM;
1418         }
1419
1420         if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) {
1421 #if IS_ENABLED(CONFIG_IPV6)
1422                 if (changelink) {
1423                         attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_TX;
1424                         goto change_notsup;
1425                 }
1426                 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]))
1427                         info->key.tun_flags &= ~TUNNEL_CSUM;
1428 #else
1429                 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX],
1430                                     "IPv6 support not enabled in the kernel");
1431                 return -EPFNOSUPPORT;
1432 #endif
1433         }
1434
1435         if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]) {
1436 #if IS_ENABLED(CONFIG_IPV6)
1437                 if (changelink) {
1438                         attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_RX;
1439                         goto change_notsup;
1440                 }
1441                 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]))
1442                         *use_udp6_rx_checksums = false;
1443 #else
1444                 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX],
1445                                     "IPv6 support not enabled in the kernel");
1446                 return -EPFNOSUPPORT;
1447 #endif
1448         }
1449
1450         return 0;
1451 change_notsup:
1452         NL_SET_ERR_MSG_ATTR(extack, data[attrtype],
1453                             "Changing VNI, Port, endpoint IP address family, external, and UDP checksum attributes are not supported");
1454         return -EOPNOTSUPP;
1455 }
1456
1457 static void geneve_link_config(struct net_device *dev,
1458                                struct ip_tunnel_info *info, struct nlattr *tb[])
1459 {
1460         struct geneve_dev *geneve = netdev_priv(dev);
1461         int ldev_mtu = 0;
1462
1463         if (tb[IFLA_MTU]) {
1464                 geneve_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
1465                 return;
1466         }
1467
1468         switch (ip_tunnel_info_af(info)) {
1469         case AF_INET: {
1470                 struct flowi4 fl4 = { .daddr = info->key.u.ipv4.dst };
1471                 struct rtable *rt = ip_route_output_key(geneve->net, &fl4);
1472
1473                 if (!IS_ERR(rt) && rt->dst.dev) {
1474                         ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV4_HLEN;
1475                         ip_rt_put(rt);
1476                 }
1477                 break;
1478         }
1479 #if IS_ENABLED(CONFIG_IPV6)
1480         case AF_INET6: {
1481                 struct rt6_info *rt = rt6_lookup(geneve->net,
1482                                                  &info->key.u.ipv6.dst, NULL, 0,
1483                                                  NULL, 0);
1484
1485                 if (rt && rt->dst.dev)
1486                         ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN;
1487                 ip6_rt_put(rt);
1488                 break;
1489         }
1490 #endif
1491         }
1492
1493         if (ldev_mtu <= 0)
1494                 return;
1495
1496         geneve_change_mtu(dev, ldev_mtu - info->options_len);
1497 }
1498
1499 static int geneve_newlink(struct net *net, struct net_device *dev,
1500                           struct nlattr *tb[], struct nlattr *data[],
1501                           struct netlink_ext_ack *extack)
1502 {
1503         bool use_udp6_rx_checksums = false;
1504         struct ip_tunnel_info info;
1505         bool ttl_inherit = false;
1506         bool metadata = false;
1507         int err;
1508
1509         init_tnl_info(&info, GENEVE_UDP_PORT);
1510         err = geneve_nl2info(tb, data, extack, &info, &metadata,
1511                              &use_udp6_rx_checksums, &ttl_inherit, false);
1512         if (err)
1513                 return err;
1514
1515         err = geneve_configure(net, dev, extack, &info, metadata,
1516                                use_udp6_rx_checksums, ttl_inherit);
1517         if (err)
1518                 return err;
1519
1520         geneve_link_config(dev, &info, tb);
1521
1522         return 0;
1523 }
1524
1525 /* Quiesces the geneve device data path for both TX and RX.
1526  *
1527  * On transmit geneve checks for non-NULL geneve_sock before it proceeds.
1528  * So, if we set that socket to NULL under RCU and wait for synchronize_net()
1529  * to complete for the existing set of in-flight packets to be transmitted,
1530  * then we would have quiesced the transmit data path. All the future packets
1531  * will get dropped until we unquiesce the data path.
1532  *
1533  * On receive geneve dereference the geneve_sock stashed in the socket. So,
1534  * if we set that to NULL under RCU and wait for synchronize_net() to
1535  * complete, then we would have quiesced the receive data path.
1536  */
1537 static void geneve_quiesce(struct geneve_dev *geneve, struct geneve_sock **gs4,
1538                            struct geneve_sock **gs6)
1539 {
1540         *gs4 = rtnl_dereference(geneve->sock4);
1541         rcu_assign_pointer(geneve->sock4, NULL);
1542         if (*gs4)
1543                 rcu_assign_sk_user_data((*gs4)->sock->sk, NULL);
1544 #if IS_ENABLED(CONFIG_IPV6)
1545         *gs6 = rtnl_dereference(geneve->sock6);
1546         rcu_assign_pointer(geneve->sock6, NULL);
1547         if (*gs6)
1548                 rcu_assign_sk_user_data((*gs6)->sock->sk, NULL);
1549 #else
1550         *gs6 = NULL;
1551 #endif
1552         synchronize_net();
1553 }
1554
1555 /* Resumes the geneve device data path for both TX and RX. */
1556 static void geneve_unquiesce(struct geneve_dev *geneve, struct geneve_sock *gs4,
1557                              struct geneve_sock __maybe_unused *gs6)
1558 {
1559         rcu_assign_pointer(geneve->sock4, gs4);
1560         if (gs4)
1561                 rcu_assign_sk_user_data(gs4->sock->sk, gs4);
1562 #if IS_ENABLED(CONFIG_IPV6)
1563         rcu_assign_pointer(geneve->sock6, gs6);
1564         if (gs6)
1565                 rcu_assign_sk_user_data(gs6->sock->sk, gs6);
1566 #endif
1567         synchronize_net();
1568 }
1569
1570 static int geneve_changelink(struct net_device *dev, struct nlattr *tb[],
1571                              struct nlattr *data[],
1572                              struct netlink_ext_ack *extack)
1573 {
1574         struct geneve_dev *geneve = netdev_priv(dev);
1575         struct geneve_sock *gs4, *gs6;
1576         struct ip_tunnel_info info;
1577         bool metadata;
1578         bool use_udp6_rx_checksums;
1579         bool ttl_inherit;
1580         int err;
1581
1582         /* If the geneve device is configured for metadata (or externally
1583          * controlled, for example, OVS), then nothing can be changed.
1584          */
1585         if (geneve->collect_md)
1586                 return -EOPNOTSUPP;
1587
1588         /* Start with the existing info. */
1589         memcpy(&info, &geneve->info, sizeof(info));
1590         metadata = geneve->collect_md;
1591         use_udp6_rx_checksums = geneve->use_udp6_rx_checksums;
1592         ttl_inherit = geneve->ttl_inherit;
1593         err = geneve_nl2info(tb, data, extack, &info, &metadata,
1594                              &use_udp6_rx_checksums, &ttl_inherit, true);
1595         if (err)
1596                 return err;
1597
1598         if (!geneve_dst_addr_equal(&geneve->info, &info)) {
1599                 dst_cache_reset(&info.dst_cache);
1600                 geneve_link_config(dev, &info, tb);
1601         }
1602
1603         geneve_quiesce(geneve, &gs4, &gs6);
1604         geneve->info = info;
1605         geneve->collect_md = metadata;
1606         geneve->use_udp6_rx_checksums = use_udp6_rx_checksums;
1607         geneve->ttl_inherit = ttl_inherit;
1608         geneve_unquiesce(geneve, gs4, gs6);
1609
1610         return 0;
1611 }
1612
1613 static void geneve_dellink(struct net_device *dev, struct list_head *head)
1614 {
1615         struct geneve_dev *geneve = netdev_priv(dev);
1616
1617         list_del(&geneve->next);
1618         unregister_netdevice_queue(dev, head);
1619 }
1620
1621 static size_t geneve_get_size(const struct net_device *dev)
1622 {
1623         return nla_total_size(sizeof(__u32)) +  /* IFLA_GENEVE_ID */
1624                 nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */
1625                 nla_total_size(sizeof(__u8)) +  /* IFLA_GENEVE_TTL */
1626                 nla_total_size(sizeof(__u8)) +  /* IFLA_GENEVE_TOS */
1627                 nla_total_size(sizeof(__be32)) +  /* IFLA_GENEVE_LABEL */
1628                 nla_total_size(sizeof(__be16)) +  /* IFLA_GENEVE_PORT */
1629                 nla_total_size(0) +      /* IFLA_GENEVE_COLLECT_METADATA */
1630                 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */
1631                 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */
1632                 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */
1633                 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL_INHERIT */
1634                 0;
1635 }
1636
1637 static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
1638 {
1639         struct geneve_dev *geneve = netdev_priv(dev);
1640         struct ip_tunnel_info *info = &geneve->info;
1641         bool ttl_inherit = geneve->ttl_inherit;
1642         bool metadata = geneve->collect_md;
1643         __u8 tmp_vni[3];
1644         __u32 vni;
1645
1646         tunnel_id_to_vni(info->key.tun_id, tmp_vni);
1647         vni = (tmp_vni[0] << 16) | (tmp_vni[1] << 8) | tmp_vni[2];
1648         if (nla_put_u32(skb, IFLA_GENEVE_ID, vni))
1649                 goto nla_put_failure;
1650
1651         if (!metadata && ip_tunnel_info_af(info) == AF_INET) {
1652                 if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE,
1653                                     info->key.u.ipv4.dst))
1654                         goto nla_put_failure;
1655                 if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM,
1656                                !!(info->key.tun_flags & TUNNEL_CSUM)))
1657                         goto nla_put_failure;
1658
1659 #if IS_ENABLED(CONFIG_IPV6)
1660         } else if (!metadata) {
1661                 if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6,
1662                                      &info->key.u.ipv6.dst))
1663                         goto nla_put_failure;
1664                 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
1665                                !(info->key.tun_flags & TUNNEL_CSUM)))
1666                         goto nla_put_failure;
1667 #endif
1668         }
1669
1670         if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) ||
1671             nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) ||
1672             nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label))
1673                 goto nla_put_failure;
1674
1675         if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst))
1676                 goto nla_put_failure;
1677
1678         if (metadata && nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA))
1679                 goto nla_put_failure;
1680
1681 #if IS_ENABLED(CONFIG_IPV6)
1682         if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
1683                        !geneve->use_udp6_rx_checksums))
1684                 goto nla_put_failure;
1685 #endif
1686
1687         if (nla_put_u8(skb, IFLA_GENEVE_TTL_INHERIT, ttl_inherit))
1688                 goto nla_put_failure;
1689
1690         return 0;
1691
1692 nla_put_failure:
1693         return -EMSGSIZE;
1694 }
1695
1696 static struct rtnl_link_ops geneve_link_ops __read_mostly = {
1697         .kind           = "geneve",
1698         .maxtype        = IFLA_GENEVE_MAX,
1699         .policy         = geneve_policy,
1700         .priv_size      = sizeof(struct geneve_dev),
1701         .setup          = geneve_setup,
1702         .validate       = geneve_validate,
1703         .newlink        = geneve_newlink,
1704         .changelink     = geneve_changelink,
1705         .dellink        = geneve_dellink,
1706         .get_size       = geneve_get_size,
1707         .fill_info      = geneve_fill_info,
1708 };
1709
1710 struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
1711                                         u8 name_assign_type, u16 dst_port)
1712 {
1713         struct nlattr *tb[IFLA_MAX + 1];
1714         struct ip_tunnel_info info;
1715         struct net_device *dev;
1716         LIST_HEAD(list_kill);
1717         int err;
1718
1719         memset(tb, 0, sizeof(tb));
1720         dev = rtnl_create_link(net, name, name_assign_type,
1721                                &geneve_link_ops, tb, NULL);
1722         if (IS_ERR(dev))
1723                 return dev;
1724
1725         init_tnl_info(&info, dst_port);
1726         err = geneve_configure(net, dev, NULL, &info, true, true, false);
1727         if (err) {
1728                 free_netdev(dev);
1729                 return ERR_PTR(err);
1730         }
1731
1732         /* openvswitch users expect packet sizes to be unrestricted,
1733          * so set the largest MTU we can.
1734          */
1735         err = geneve_change_mtu(dev, IP_MAX_MTU);
1736         if (err)
1737                 goto err;
1738
1739         err = rtnl_configure_link(dev, NULL);
1740         if (err < 0)
1741                 goto err;
1742
1743         return dev;
1744 err:
1745         geneve_dellink(dev, &list_kill);
1746         unregister_netdevice_many(&list_kill);
1747         return ERR_PTR(err);
1748 }
1749 EXPORT_SYMBOL_GPL(geneve_dev_create_fb);
1750
1751 static int geneve_netdevice_event(struct notifier_block *unused,
1752                                   unsigned long event, void *ptr)
1753 {
1754         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1755
1756         if (event == NETDEV_UDP_TUNNEL_PUSH_INFO ||
1757             event == NETDEV_UDP_TUNNEL_DROP_INFO) {
1758                 geneve_offload_rx_ports(dev, event == NETDEV_UDP_TUNNEL_PUSH_INFO);
1759         } else if (event == NETDEV_UNREGISTER) {
1760                 geneve_offload_rx_ports(dev, false);
1761         } else if (event == NETDEV_REGISTER) {
1762                 geneve_offload_rx_ports(dev, true);
1763         }
1764
1765         return NOTIFY_DONE;
1766 }
1767
1768 static struct notifier_block geneve_notifier_block __read_mostly = {
1769         .notifier_call = geneve_netdevice_event,
1770 };
1771
1772 static __net_init int geneve_init_net(struct net *net)
1773 {
1774         struct geneve_net *gn = net_generic(net, geneve_net_id);
1775
1776         INIT_LIST_HEAD(&gn->geneve_list);
1777         INIT_LIST_HEAD(&gn->sock_list);
1778         return 0;
1779 }
1780
1781 static void geneve_destroy_tunnels(struct net *net, struct list_head *head)
1782 {
1783         struct geneve_net *gn = net_generic(net, geneve_net_id);
1784         struct geneve_dev *geneve, *next;
1785         struct net_device *dev, *aux;
1786
1787         /* gather any geneve devices that were moved into this ns */
1788         for_each_netdev_safe(net, dev, aux)
1789                 if (dev->rtnl_link_ops == &geneve_link_ops)
1790                         unregister_netdevice_queue(dev, head);
1791
1792         /* now gather any other geneve devices that were created in this ns */
1793         list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) {
1794                 /* If geneve->dev is in the same netns, it was already added
1795                  * to the list by the previous loop.
1796                  */
1797                 if (!net_eq(dev_net(geneve->dev), net))
1798                         unregister_netdevice_queue(geneve->dev, head);
1799         }
1800
1801         WARN_ON_ONCE(!list_empty(&gn->sock_list));
1802 }
1803
1804 static void __net_exit geneve_exit_batch_net(struct list_head *net_list)
1805 {
1806         struct net *net;
1807         LIST_HEAD(list);
1808
1809         rtnl_lock();
1810         list_for_each_entry(net, net_list, exit_list)
1811                 geneve_destroy_tunnels(net, &list);
1812
1813         /* unregister the devices gathered above */
1814         unregister_netdevice_many(&list);
1815         rtnl_unlock();
1816 }
1817
1818 static struct pernet_operations geneve_net_ops = {
1819         .init = geneve_init_net,
1820         .exit_batch = geneve_exit_batch_net,
1821         .id   = &geneve_net_id,
1822         .size = sizeof(struct geneve_net),
1823 };
1824
1825 static int __init geneve_init_module(void)
1826 {
1827         int rc;
1828
1829         rc = register_pernet_subsys(&geneve_net_ops);
1830         if (rc)
1831                 goto out1;
1832
1833         rc = register_netdevice_notifier(&geneve_notifier_block);
1834         if (rc)
1835                 goto out2;
1836
1837         rc = rtnl_link_register(&geneve_link_ops);
1838         if (rc)
1839                 goto out3;
1840
1841         return 0;
1842 out3:
1843         unregister_netdevice_notifier(&geneve_notifier_block);
1844 out2:
1845         unregister_pernet_subsys(&geneve_net_ops);
1846 out1:
1847         return rc;
1848 }
1849 late_initcall(geneve_init_module);
1850
1851 static void __exit geneve_cleanup_module(void)
1852 {
1853         rtnl_link_unregister(&geneve_link_ops);
1854         unregister_netdevice_notifier(&geneve_notifier_block);
1855         unregister_pernet_subsys(&geneve_net_ops);
1856 }
1857 module_exit(geneve_cleanup_module);
1858
1859 MODULE_LICENSE("GPL");
1860 MODULE_VERSION(GENEVE_NETDEV_VER);
1861 MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>");
1862 MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic");
1863 MODULE_ALIAS_RTNL_LINK("geneve");