OSDN Git Service

net: sched: add rcu annotations around qdisc->qdisc_sleeping
[tomoyo/tomoyo-test1.git] / net / sched / sch_teql.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
3  *
4  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
5  */
6
7 #include <linux/module.h>
8 #include <linux/types.h>
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 #include <linux/string.h>
12 #include <linux/errno.h>
13 #include <linux/if_arp.h>
14 #include <linux/netdevice.h>
15 #include <linux/init.h>
16 #include <linux/skbuff.h>
17 #include <linux/moduleparam.h>
18 #include <net/dst.h>
19 #include <net/neighbour.h>
20 #include <net/pkt_sched.h>
21
22 /*
23    How to setup it.
24    ----------------
25
26    After loading this module you will find a new device teqlN
27    and new qdisc with the same name. To join a slave to the equalizer
28    you should just set this qdisc on a device f.e.
29
30    # tc qdisc add dev eth0 root teql0
31    # tc qdisc add dev eth1 root teql0
32
33    That's all. Full PnP 8)
34
35    Applicability.
36    --------------
37
38    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
39       signal and generate EOI events. If you want to equalize virtual devices
40       like tunnels, use a normal eql device.
41    2. This device puts no limitations on physical slave characteristics
42       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
43       Certainly, large difference in link speeds will make the resulting
44       eqalized link unusable, because of huge packet reordering.
45       I estimate an upper useful difference as ~10 times.
46    3. If the slave requires address resolution, only protocols using
47       neighbour cache (IPv4/IPv6) will work over the equalized link.
48       Other protocols are still allowed to use the slave device directly,
49       which will not break load balancing, though native slave
50       traffic will have the highest priority.  */
51
52 struct teql_master {
53         struct Qdisc_ops qops;
54         struct net_device *dev;
55         struct Qdisc *slaves;
56         struct list_head master_list;
57         unsigned long   tx_bytes;
58         unsigned long   tx_packets;
59         unsigned long   tx_errors;
60         unsigned long   tx_dropped;
61 };
62
63 struct teql_sched_data {
64         struct Qdisc *next;
65         struct teql_master *m;
66         struct sk_buff_head q;
67 };
68
69 #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
70
71 #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
72
73 /* "teql*" qdisc routines */
74
75 static int
76 teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
77 {
78         struct net_device *dev = qdisc_dev(sch);
79         struct teql_sched_data *q = qdisc_priv(sch);
80
81         if (q->q.qlen < dev->tx_queue_len) {
82                 __skb_queue_tail(&q->q, skb);
83                 return NET_XMIT_SUCCESS;
84         }
85
86         return qdisc_drop(skb, sch, to_free);
87 }
88
89 static struct sk_buff *
90 teql_dequeue(struct Qdisc *sch)
91 {
92         struct teql_sched_data *dat = qdisc_priv(sch);
93         struct netdev_queue *dat_queue;
94         struct sk_buff *skb;
95         struct Qdisc *q;
96
97         skb = __skb_dequeue(&dat->q);
98         dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
99         q = rcu_dereference_bh(dat_queue->qdisc);
100
101         if (skb == NULL) {
102                 struct net_device *m = qdisc_dev(q);
103                 if (m) {
104                         dat->m->slaves = sch;
105                         netif_wake_queue(m);
106                 }
107         } else {
108                 qdisc_bstats_update(sch, skb);
109         }
110         sch->q.qlen = dat->q.qlen + q->q.qlen;
111         return skb;
112 }
113
114 static struct sk_buff *
115 teql_peek(struct Qdisc *sch)
116 {
117         /* teql is meant to be used as root qdisc */
118         return NULL;
119 }
120
121 static void
122 teql_reset(struct Qdisc *sch)
123 {
124         struct teql_sched_data *dat = qdisc_priv(sch);
125
126         skb_queue_purge(&dat->q);
127 }
128
129 static void
130 teql_destroy(struct Qdisc *sch)
131 {
132         struct Qdisc *q, *prev;
133         struct teql_sched_data *dat = qdisc_priv(sch);
134         struct teql_master *master = dat->m;
135
136         if (!master)
137                 return;
138
139         prev = master->slaves;
140         if (prev) {
141                 do {
142                         q = NEXT_SLAVE(prev);
143                         if (q == sch) {
144                                 NEXT_SLAVE(prev) = NEXT_SLAVE(q);
145                                 if (q == master->slaves) {
146                                         master->slaves = NEXT_SLAVE(q);
147                                         if (q == master->slaves) {
148                                                 struct netdev_queue *txq;
149                                                 spinlock_t *root_lock;
150
151                                                 txq = netdev_get_tx_queue(master->dev, 0);
152                                                 master->slaves = NULL;
153
154                                                 root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
155                                                 spin_lock_bh(root_lock);
156                                                 qdisc_reset(rtnl_dereference(txq->qdisc));
157                                                 spin_unlock_bh(root_lock);
158                                         }
159                                 }
160                                 skb_queue_purge(&dat->q);
161                                 break;
162                         }
163
164                 } while ((prev = q) != master->slaves);
165         }
166 }
167
168 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
169                            struct netlink_ext_ack *extack)
170 {
171         struct net_device *dev = qdisc_dev(sch);
172         struct teql_master *m = (struct teql_master *)sch->ops;
173         struct teql_sched_data *q = qdisc_priv(sch);
174
175         if (dev->hard_header_len > m->dev->hard_header_len)
176                 return -EINVAL;
177
178         if (m->dev == dev)
179                 return -ELOOP;
180
181         q->m = m;
182
183         skb_queue_head_init(&q->q);
184
185         if (m->slaves) {
186                 if (m->dev->flags & IFF_UP) {
187                         if ((m->dev->flags & IFF_POINTOPOINT &&
188                              !(dev->flags & IFF_POINTOPOINT)) ||
189                             (m->dev->flags & IFF_BROADCAST &&
190                              !(dev->flags & IFF_BROADCAST)) ||
191                             (m->dev->flags & IFF_MULTICAST &&
192                              !(dev->flags & IFF_MULTICAST)) ||
193                             dev->mtu < m->dev->mtu)
194                                 return -EINVAL;
195                 } else {
196                         if (!(dev->flags&IFF_POINTOPOINT))
197                                 m->dev->flags &= ~IFF_POINTOPOINT;
198                         if (!(dev->flags&IFF_BROADCAST))
199                                 m->dev->flags &= ~IFF_BROADCAST;
200                         if (!(dev->flags&IFF_MULTICAST))
201                                 m->dev->flags &= ~IFF_MULTICAST;
202                         if (dev->mtu < m->dev->mtu)
203                                 m->dev->mtu = dev->mtu;
204                 }
205                 q->next = NEXT_SLAVE(m->slaves);
206                 NEXT_SLAVE(m->slaves) = sch;
207         } else {
208                 q->next = sch;
209                 m->slaves = sch;
210                 m->dev->mtu = dev->mtu;
211                 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
212         }
213         return 0;
214 }
215
216
217 static int
218 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
219                struct net_device *dev, struct netdev_queue *txq,
220                struct dst_entry *dst)
221 {
222         struct neighbour *n;
223         int err = 0;
224
225         n = dst_neigh_lookup_skb(dst, skb);
226         if (!n)
227                 return -ENOENT;
228
229         if (dst->dev != dev) {
230                 struct neighbour *mn;
231
232                 mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
233                 neigh_release(n);
234                 if (IS_ERR(mn))
235                         return PTR_ERR(mn);
236                 n = mn;
237         }
238
239         if (neigh_event_send(n, skb_res) == 0) {
240                 int err;
241                 char haddr[MAX_ADDR_LEN];
242
243                 neigh_ha_snapshot(haddr, n, dev);
244                 err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)),
245                                       haddr, NULL, skb->len);
246
247                 if (err < 0)
248                         err = -EINVAL;
249         } else {
250                 err = (skb_res == NULL) ? -EAGAIN : 1;
251         }
252         neigh_release(n);
253         return err;
254 }
255
256 static inline int teql_resolve(struct sk_buff *skb,
257                                struct sk_buff *skb_res,
258                                struct net_device *dev,
259                                struct netdev_queue *txq)
260 {
261         struct dst_entry *dst = skb_dst(skb);
262         int res;
263
264         if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
265                 return -ENODEV;
266
267         if (!dev->header_ops || !dst)
268                 return 0;
269
270         rcu_read_lock();
271         res = __teql_resolve(skb, skb_res, dev, txq, dst);
272         rcu_read_unlock();
273
274         return res;
275 }
276
277 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
278 {
279         struct teql_master *master = netdev_priv(dev);
280         struct Qdisc *start, *q;
281         int busy;
282         int nores;
283         int subq = skb_get_queue_mapping(skb);
284         struct sk_buff *skb_res = NULL;
285
286         start = master->slaves;
287
288 restart:
289         nores = 0;
290         busy = 0;
291
292         q = start;
293         if (!q)
294                 goto drop;
295
296         do {
297                 struct net_device *slave = qdisc_dev(q);
298                 struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
299
300                 if (rcu_access_pointer(slave_txq->qdisc_sleeping) != q)
301                         continue;
302                 if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
303                     !netif_running(slave)) {
304                         busy = 1;
305                         continue;
306                 }
307
308                 switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
309                 case 0:
310                         if (__netif_tx_trylock(slave_txq)) {
311                                 unsigned int length = qdisc_pkt_len(skb);
312
313                                 if (!netif_xmit_frozen_or_stopped(slave_txq) &&
314                                     netdev_start_xmit(skb, slave, slave_txq, false) ==
315                                     NETDEV_TX_OK) {
316                                         __netif_tx_unlock(slave_txq);
317                                         master->slaves = NEXT_SLAVE(q);
318                                         netif_wake_queue(dev);
319                                         master->tx_packets++;
320                                         master->tx_bytes += length;
321                                         return NETDEV_TX_OK;
322                                 }
323                                 __netif_tx_unlock(slave_txq);
324                         }
325                         if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
326                                 busy = 1;
327                         break;
328                 case 1:
329                         master->slaves = NEXT_SLAVE(q);
330                         return NETDEV_TX_OK;
331                 default:
332                         nores = 1;
333                         break;
334                 }
335                 __skb_pull(skb, skb_network_offset(skb));
336         } while ((q = NEXT_SLAVE(q)) != start);
337
338         if (nores && skb_res == NULL) {
339                 skb_res = skb;
340                 goto restart;
341         }
342
343         if (busy) {
344                 netif_stop_queue(dev);
345                 return NETDEV_TX_BUSY;
346         }
347         master->tx_errors++;
348
349 drop:
350         master->tx_dropped++;
351         dev_kfree_skb(skb);
352         return NETDEV_TX_OK;
353 }
354
355 static int teql_master_open(struct net_device *dev)
356 {
357         struct Qdisc *q;
358         struct teql_master *m = netdev_priv(dev);
359         int mtu = 0xFFFE;
360         unsigned int flags = IFF_NOARP | IFF_MULTICAST;
361
362         if (m->slaves == NULL)
363                 return -EUNATCH;
364
365         flags = FMASK;
366
367         q = m->slaves;
368         do {
369                 struct net_device *slave = qdisc_dev(q);
370
371                 if (slave == NULL)
372                         return -EUNATCH;
373
374                 if (slave->mtu < mtu)
375                         mtu = slave->mtu;
376                 if (slave->hard_header_len > LL_MAX_HEADER)
377                         return -EINVAL;
378
379                 /* If all the slaves are BROADCAST, master is BROADCAST
380                    If all the slaves are PtP, master is PtP
381                    Otherwise, master is NBMA.
382                  */
383                 if (!(slave->flags&IFF_POINTOPOINT))
384                         flags &= ~IFF_POINTOPOINT;
385                 if (!(slave->flags&IFF_BROADCAST))
386                         flags &= ~IFF_BROADCAST;
387                 if (!(slave->flags&IFF_MULTICAST))
388                         flags &= ~IFF_MULTICAST;
389         } while ((q = NEXT_SLAVE(q)) != m->slaves);
390
391         m->dev->mtu = mtu;
392         m->dev->flags = (m->dev->flags&~FMASK) | flags;
393         netif_start_queue(m->dev);
394         return 0;
395 }
396
397 static int teql_master_close(struct net_device *dev)
398 {
399         netif_stop_queue(dev);
400         return 0;
401 }
402
403 static void teql_master_stats64(struct net_device *dev,
404                                 struct rtnl_link_stats64 *stats)
405 {
406         struct teql_master *m = netdev_priv(dev);
407
408         stats->tx_packets       = m->tx_packets;
409         stats->tx_bytes         = m->tx_bytes;
410         stats->tx_errors        = m->tx_errors;
411         stats->tx_dropped       = m->tx_dropped;
412 }
413
414 static int teql_master_mtu(struct net_device *dev, int new_mtu)
415 {
416         struct teql_master *m = netdev_priv(dev);
417         struct Qdisc *q;
418
419         q = m->slaves;
420         if (q) {
421                 do {
422                         if (new_mtu > qdisc_dev(q)->mtu)
423                                 return -EINVAL;
424                 } while ((q = NEXT_SLAVE(q)) != m->slaves);
425         }
426
427         dev->mtu = new_mtu;
428         return 0;
429 }
430
431 static const struct net_device_ops teql_netdev_ops = {
432         .ndo_open       = teql_master_open,
433         .ndo_stop       = teql_master_close,
434         .ndo_start_xmit = teql_master_xmit,
435         .ndo_get_stats64 = teql_master_stats64,
436         .ndo_change_mtu = teql_master_mtu,
437 };
438
439 static __init void teql_master_setup(struct net_device *dev)
440 {
441         struct teql_master *master = netdev_priv(dev);
442         struct Qdisc_ops *ops = &master->qops;
443
444         master->dev     = dev;
445         ops->priv_size  = sizeof(struct teql_sched_data);
446
447         ops->enqueue    =       teql_enqueue;
448         ops->dequeue    =       teql_dequeue;
449         ops->peek       =       teql_peek;
450         ops->init       =       teql_qdisc_init;
451         ops->reset      =       teql_reset;
452         ops->destroy    =       teql_destroy;
453         ops->owner      =       THIS_MODULE;
454
455         dev->netdev_ops =       &teql_netdev_ops;
456         dev->type               = ARPHRD_VOID;
457         dev->mtu                = 1500;
458         dev->min_mtu            = 68;
459         dev->max_mtu            = 65535;
460         dev->tx_queue_len       = 100;
461         dev->flags              = IFF_NOARP;
462         dev->hard_header_len    = LL_MAX_HEADER;
463         netif_keep_dst(dev);
464 }
465
466 static LIST_HEAD(master_dev_list);
467 static int max_equalizers = 1;
468 module_param(max_equalizers, int, 0);
469 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
470
471 static int __init teql_init(void)
472 {
473         int i;
474         int err = -ENODEV;
475
476         for (i = 0; i < max_equalizers; i++) {
477                 struct net_device *dev;
478                 struct teql_master *master;
479
480                 dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
481                                    NET_NAME_UNKNOWN, teql_master_setup);
482                 if (!dev) {
483                         err = -ENOMEM;
484                         break;
485                 }
486
487                 if ((err = register_netdev(dev))) {
488                         free_netdev(dev);
489                         break;
490                 }
491
492                 master = netdev_priv(dev);
493
494                 strscpy(master->qops.id, dev->name, IFNAMSIZ);
495                 err = register_qdisc(&master->qops);
496
497                 if (err) {
498                         unregister_netdev(dev);
499                         free_netdev(dev);
500                         break;
501                 }
502
503                 list_add_tail(&master->master_list, &master_dev_list);
504         }
505         return i ? 0 : err;
506 }
507
508 static void __exit teql_exit(void)
509 {
510         struct teql_master *master, *nxt;
511
512         list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
513
514                 list_del(&master->master_list);
515
516                 unregister_qdisc(&master->qops);
517                 unregister_netdev(master->dev);
518                 free_netdev(master->dev);
519         }
520 }
521
522 module_init(teql_init);
523 module_exit(teql_exit);
524
525 MODULE_LICENSE("GPL");