OSDN Git Service

net: sched: Add policy validation for tc attributes
[tomoyo/tomoyo-test1.git] / net / sched / sch_api.c
1 /*
2  * net/sched/sch_api.c  Packet scheduler API.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Fixes:
12  *
13  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16  */
17
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
30 #include <linux/lockdep.h>
31 #include <linux/slab.h>
32 #include <linux/hashtable.h>
33
34 #include <net/net_namespace.h>
35 #include <net/sock.h>
36 #include <net/netlink.h>
37 #include <net/pkt_sched.h>
38 #include <net/pkt_cls.h>
39
40 /*
41
42    Short review.
43    -------------
44
45    This file consists of two interrelated parts:
46
47    1. queueing disciplines manager frontend.
48    2. traffic classes manager frontend.
49
50    Generally, queueing discipline ("qdisc") is a black box,
51    which is able to enqueue packets and to dequeue them (when
52    device is ready to send something) in order and at times
53    determined by algorithm hidden in it.
54
55    qdisc's are divided to two categories:
56    - "queues", which have no internal structure visible from outside.
57    - "schedulers", which split all the packets to "traffic classes",
58      using "packet classifiers" (look at cls_api.c)
59
60    In turn, classes may have child qdiscs (as rule, queues)
61    attached to them etc. etc. etc.
62
63    The goal of the routines in this file is to translate
64    information supplied by user in the form of handles
65    to more intelligible for kernel form, to make some sanity
66    checks and part of work, which is common to all qdiscs
67    and to provide rtnetlink notifications.
68
69    All real intelligent work is done inside qdisc modules.
70
71
72
73    Every discipline has two major routines: enqueue and dequeue.
74
75    ---dequeue
76
77    dequeue usually returns a skb to send. It is allowed to return NULL,
78    but it does not mean that queue is empty, it just means that
79    discipline does not want to send anything this time.
80    Queue is really empty if q->q.qlen == 0.
81    For complicated disciplines with multiple queues q->q is not
82    real packet queue, but however q->q.qlen must be valid.
83
84    ---enqueue
85
86    enqueue returns 0, if packet was enqueued successfully.
87    If packet (this one or another one) was dropped, it returns
88    not zero error code.
89    NET_XMIT_DROP        - this packet dropped
90      Expected action: do not backoff, but wait until queue will clear.
91    NET_XMIT_CN          - probably this packet enqueued, but another one dropped.
92      Expected action: backoff or ignore
93
94    Auxiliary routines:
95
96    ---peek
97
98    like dequeue but without removing a packet from the queue
99
100    ---reset
101
102    returns qdisc to initial state: purge all buffers, clear all
103    timers, counters (except for statistics) etc.
104
105    ---init
106
107    initializes newly created qdisc.
108
109    ---destroy
110
111    destroys resources allocated by init and during lifetime of qdisc.
112
113    ---change
114
115    changes qdisc parameters.
116  */
117
118 /* Protects list of registered TC modules. It is pure SMP lock. */
119 static DEFINE_RWLOCK(qdisc_mod_lock);
120
121
122 /************************************************
123  *      Queueing disciplines manipulation.      *
124  ************************************************/
125
126
127 /* The list of all installed queueing disciplines. */
128
129 static struct Qdisc_ops *qdisc_base;
130
131 /* Register/unregister queueing discipline */
132
133 int register_qdisc(struct Qdisc_ops *qops)
134 {
135         struct Qdisc_ops *q, **qp;
136         int rc = -EEXIST;
137
138         write_lock(&qdisc_mod_lock);
139         for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
140                 if (!strcmp(qops->id, q->id))
141                         goto out;
142
143         if (qops->enqueue == NULL)
144                 qops->enqueue = noop_qdisc_ops.enqueue;
145         if (qops->peek == NULL) {
146                 if (qops->dequeue == NULL)
147                         qops->peek = noop_qdisc_ops.peek;
148                 else
149                         goto out_einval;
150         }
151         if (qops->dequeue == NULL)
152                 qops->dequeue = noop_qdisc_ops.dequeue;
153
154         if (qops->cl_ops) {
155                 const struct Qdisc_class_ops *cops = qops->cl_ops;
156
157                 if (!(cops->find && cops->walk && cops->leaf))
158                         goto out_einval;
159
160                 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
161                         goto out_einval;
162         }
163
164         qops->next = NULL;
165         *qp = qops;
166         rc = 0;
167 out:
168         write_unlock(&qdisc_mod_lock);
169         return rc;
170
171 out_einval:
172         rc = -EINVAL;
173         goto out;
174 }
175 EXPORT_SYMBOL(register_qdisc);
176
177 int unregister_qdisc(struct Qdisc_ops *qops)
178 {
179         struct Qdisc_ops *q, **qp;
180         int err = -ENOENT;
181
182         write_lock(&qdisc_mod_lock);
183         for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
184                 if (q == qops)
185                         break;
186         if (q) {
187                 *qp = q->next;
188                 q->next = NULL;
189                 err = 0;
190         }
191         write_unlock(&qdisc_mod_lock);
192         return err;
193 }
194 EXPORT_SYMBOL(unregister_qdisc);
195
196 /* Get default qdisc if not otherwise specified */
197 void qdisc_get_default(char *name, size_t len)
198 {
199         read_lock(&qdisc_mod_lock);
200         strlcpy(name, default_qdisc_ops->id, len);
201         read_unlock(&qdisc_mod_lock);
202 }
203
204 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
205 {
206         struct Qdisc_ops *q = NULL;
207
208         for (q = qdisc_base; q; q = q->next) {
209                 if (!strcmp(name, q->id)) {
210                         if (!try_module_get(q->owner))
211                                 q = NULL;
212                         break;
213                 }
214         }
215
216         return q;
217 }
218
219 /* Set new default qdisc to use */
220 int qdisc_set_default(const char *name)
221 {
222         const struct Qdisc_ops *ops;
223
224         if (!capable(CAP_NET_ADMIN))
225                 return -EPERM;
226
227         write_lock(&qdisc_mod_lock);
228         ops = qdisc_lookup_default(name);
229         if (!ops) {
230                 /* Not found, drop lock and try to load module */
231                 write_unlock(&qdisc_mod_lock);
232                 request_module("sch_%s", name);
233                 write_lock(&qdisc_mod_lock);
234
235                 ops = qdisc_lookup_default(name);
236         }
237
238         if (ops) {
239                 /* Set new default */
240                 module_put(default_qdisc_ops->owner);
241                 default_qdisc_ops = ops;
242         }
243         write_unlock(&qdisc_mod_lock);
244
245         return ops ? 0 : -ENOENT;
246 }
247
248 #ifdef CONFIG_NET_SCH_DEFAULT
249 /* Set default value from kernel config */
250 static int __init sch_default_qdisc(void)
251 {
252         return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
253 }
254 late_initcall(sch_default_qdisc);
255 #endif
256
257 /* We know handle. Find qdisc among all qdisc's attached to device
258  * (root qdisc, all its children, children of children etc.)
259  * Note: caller either uses rtnl or rcu_read_lock()
260  */
261
262 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
263 {
264         struct Qdisc *q;
265
266         if (!qdisc_dev(root))
267                 return (root->handle == handle ? root : NULL);
268
269         if (!(root->flags & TCQ_F_BUILTIN) &&
270             root->handle == handle)
271                 return root;
272
273         hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
274                 if (q->handle == handle)
275                         return q;
276         }
277         return NULL;
278 }
279
280 void qdisc_hash_add(struct Qdisc *q, bool invisible)
281 {
282         if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
283                 ASSERT_RTNL();
284                 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
285                 if (invisible)
286                         q->flags |= TCQ_F_INVISIBLE;
287         }
288 }
289 EXPORT_SYMBOL(qdisc_hash_add);
290
291 void qdisc_hash_del(struct Qdisc *q)
292 {
293         if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
294                 ASSERT_RTNL();
295                 hash_del_rcu(&q->hash);
296         }
297 }
298 EXPORT_SYMBOL(qdisc_hash_del);
299
300 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
301 {
302         struct Qdisc *q;
303
304         if (!handle)
305                 return NULL;
306         q = qdisc_match_from_root(dev->qdisc, handle);
307         if (q)
308                 goto out;
309
310         if (dev_ingress_queue(dev))
311                 q = qdisc_match_from_root(
312                         dev_ingress_queue(dev)->qdisc_sleeping,
313                         handle);
314 out:
315         return q;
316 }
317
318 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
319 {
320         unsigned long cl;
321         struct Qdisc *leaf;
322         const struct Qdisc_class_ops *cops = p->ops->cl_ops;
323
324         if (cops == NULL)
325                 return NULL;
326         cl = cops->find(p, classid);
327
328         if (cl == 0)
329                 return NULL;
330         leaf = cops->leaf(p, cl);
331         return leaf;
332 }
333
334 /* Find queueing discipline by name */
335
336 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
337 {
338         struct Qdisc_ops *q = NULL;
339
340         if (kind) {
341                 read_lock(&qdisc_mod_lock);
342                 for (q = qdisc_base; q; q = q->next) {
343                         if (nla_strcmp(kind, q->id) == 0) {
344                                 if (!try_module_get(q->owner))
345                                         q = NULL;
346                                 break;
347                         }
348                 }
349                 read_unlock(&qdisc_mod_lock);
350         }
351         return q;
352 }
353
354 /* The linklayer setting were not transferred from iproute2, in older
355  * versions, and the rate tables lookup systems have been dropped in
356  * the kernel. To keep backward compatible with older iproute2 tc
357  * utils, we detect the linklayer setting by detecting if the rate
358  * table were modified.
359  *
360  * For linklayer ATM table entries, the rate table will be aligned to
361  * 48 bytes, thus some table entries will contain the same value.  The
362  * mpu (min packet unit) is also encoded into the old rate table, thus
363  * starting from the mpu, we find low and high table entries for
364  * mapping this cell.  If these entries contain the same value, when
365  * the rate tables have been modified for linklayer ATM.
366  *
367  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
368  * and then roundup to the next cell, calc the table entry one below,
369  * and compare.
370  */
371 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
372 {
373         int low       = roundup(r->mpu, 48);
374         int high      = roundup(low+1, 48);
375         int cell_low  = low >> r->cell_log;
376         int cell_high = (high >> r->cell_log) - 1;
377
378         /* rtab is too inaccurate at rates > 100Mbit/s */
379         if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
380                 pr_debug("TC linklayer: Giving up ATM detection\n");
381                 return TC_LINKLAYER_ETHERNET;
382         }
383
384         if ((cell_high > cell_low) && (cell_high < 256)
385             && (rtab[cell_low] == rtab[cell_high])) {
386                 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
387                          cell_low, cell_high, rtab[cell_high]);
388                 return TC_LINKLAYER_ATM;
389         }
390         return TC_LINKLAYER_ETHERNET;
391 }
392
393 static struct qdisc_rate_table *qdisc_rtab_list;
394
395 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
396                                         struct nlattr *tab,
397                                         struct netlink_ext_ack *extack)
398 {
399         struct qdisc_rate_table *rtab;
400
401         if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
402             nla_len(tab) != TC_RTAB_SIZE) {
403                 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
404                 return NULL;
405         }
406
407         for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
408                 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
409                     !memcmp(&rtab->data, nla_data(tab), 1024)) {
410                         rtab->refcnt++;
411                         return rtab;
412                 }
413         }
414
415         rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
416         if (rtab) {
417                 rtab->rate = *r;
418                 rtab->refcnt = 1;
419                 memcpy(rtab->data, nla_data(tab), 1024);
420                 if (r->linklayer == TC_LINKLAYER_UNAWARE)
421                         r->linklayer = __detect_linklayer(r, rtab->data);
422                 rtab->next = qdisc_rtab_list;
423                 qdisc_rtab_list = rtab;
424         } else {
425                 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
426         }
427         return rtab;
428 }
429 EXPORT_SYMBOL(qdisc_get_rtab);
430
431 void qdisc_put_rtab(struct qdisc_rate_table *tab)
432 {
433         struct qdisc_rate_table *rtab, **rtabp;
434
435         if (!tab || --tab->refcnt)
436                 return;
437
438         for (rtabp = &qdisc_rtab_list;
439              (rtab = *rtabp) != NULL;
440              rtabp = &rtab->next) {
441                 if (rtab == tab) {
442                         *rtabp = rtab->next;
443                         kfree(rtab);
444                         return;
445                 }
446         }
447 }
448 EXPORT_SYMBOL(qdisc_put_rtab);
449
450 static LIST_HEAD(qdisc_stab_list);
451
452 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
453         [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
454         [TCA_STAB_DATA] = { .type = NLA_BINARY },
455 };
456
457 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
458                                                struct netlink_ext_ack *extack)
459 {
460         struct nlattr *tb[TCA_STAB_MAX + 1];
461         struct qdisc_size_table *stab;
462         struct tc_sizespec *s;
463         unsigned int tsize = 0;
464         u16 *tab = NULL;
465         int err;
466
467         err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, extack);
468         if (err < 0)
469                 return ERR_PTR(err);
470         if (!tb[TCA_STAB_BASE]) {
471                 NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
472                 return ERR_PTR(-EINVAL);
473         }
474
475         s = nla_data(tb[TCA_STAB_BASE]);
476
477         if (s->tsize > 0) {
478                 if (!tb[TCA_STAB_DATA]) {
479                         NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
480                         return ERR_PTR(-EINVAL);
481                 }
482                 tab = nla_data(tb[TCA_STAB_DATA]);
483                 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
484         }
485
486         if (tsize != s->tsize || (!tab && tsize > 0)) {
487                 NL_SET_ERR_MSG(extack, "Invalid size of size table");
488                 return ERR_PTR(-EINVAL);
489         }
490
491         list_for_each_entry(stab, &qdisc_stab_list, list) {
492                 if (memcmp(&stab->szopts, s, sizeof(*s)))
493                         continue;
494                 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
495                         continue;
496                 stab->refcnt++;
497                 return stab;
498         }
499
500         stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
501         if (!stab)
502                 return ERR_PTR(-ENOMEM);
503
504         stab->refcnt = 1;
505         stab->szopts = *s;
506         if (tsize > 0)
507                 memcpy(stab->data, tab, tsize * sizeof(u16));
508
509         list_add_tail(&stab->list, &qdisc_stab_list);
510
511         return stab;
512 }
513
514 static void stab_kfree_rcu(struct rcu_head *head)
515 {
516         kfree(container_of(head, struct qdisc_size_table, rcu));
517 }
518
519 void qdisc_put_stab(struct qdisc_size_table *tab)
520 {
521         if (!tab)
522                 return;
523
524         if (--tab->refcnt == 0) {
525                 list_del(&tab->list);
526                 call_rcu_bh(&tab->rcu, stab_kfree_rcu);
527         }
528 }
529 EXPORT_SYMBOL(qdisc_put_stab);
530
531 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
532 {
533         struct nlattr *nest;
534
535         nest = nla_nest_start(skb, TCA_STAB);
536         if (nest == NULL)
537                 goto nla_put_failure;
538         if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
539                 goto nla_put_failure;
540         nla_nest_end(skb, nest);
541
542         return skb->len;
543
544 nla_put_failure:
545         return -1;
546 }
547
548 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
549                                const struct qdisc_size_table *stab)
550 {
551         int pkt_len, slot;
552
553         pkt_len = skb->len + stab->szopts.overhead;
554         if (unlikely(!stab->szopts.tsize))
555                 goto out;
556
557         slot = pkt_len + stab->szopts.cell_align;
558         if (unlikely(slot < 0))
559                 slot = 0;
560
561         slot >>= stab->szopts.cell_log;
562         if (likely(slot < stab->szopts.tsize))
563                 pkt_len = stab->data[slot];
564         else
565                 pkt_len = stab->data[stab->szopts.tsize - 1] *
566                                 (slot / stab->szopts.tsize) +
567                                 stab->data[slot % stab->szopts.tsize];
568
569         pkt_len <<= stab->szopts.size_log;
570 out:
571         if (unlikely(pkt_len < 1))
572                 pkt_len = 1;
573         qdisc_skb_cb(skb)->pkt_len = pkt_len;
574 }
575 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
576
577 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
578 {
579         if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
580                 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
581                         txt, qdisc->ops->id, qdisc->handle >> 16);
582                 qdisc->flags |= TCQ_F_WARN_NONWC;
583         }
584 }
585 EXPORT_SYMBOL(qdisc_warn_nonwc);
586
587 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
588 {
589         struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
590                                                  timer);
591
592         rcu_read_lock();
593         __netif_schedule(qdisc_root(wd->qdisc));
594         rcu_read_unlock();
595
596         return HRTIMER_NORESTART;
597 }
598
599 void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
600                                  clockid_t clockid)
601 {
602         hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
603         wd->timer.function = qdisc_watchdog;
604         wd->qdisc = qdisc;
605 }
606 EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
607
608 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
609 {
610         qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
611 }
612 EXPORT_SYMBOL(qdisc_watchdog_init);
613
614 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
615 {
616         if (test_bit(__QDISC_STATE_DEACTIVATED,
617                      &qdisc_root_sleeping(wd->qdisc)->state))
618                 return;
619
620         if (wd->last_expires == expires)
621                 return;
622
623         wd->last_expires = expires;
624         hrtimer_start(&wd->timer,
625                       ns_to_ktime(expires),
626                       HRTIMER_MODE_ABS_PINNED);
627 }
628 EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
629
630 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
631 {
632         hrtimer_cancel(&wd->timer);
633 }
634 EXPORT_SYMBOL(qdisc_watchdog_cancel);
635
636 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
637 {
638         struct hlist_head *h;
639         unsigned int i;
640
641         h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
642
643         if (h != NULL) {
644                 for (i = 0; i < n; i++)
645                         INIT_HLIST_HEAD(&h[i]);
646         }
647         return h;
648 }
649
650 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
651 {
652         struct Qdisc_class_common *cl;
653         struct hlist_node *next;
654         struct hlist_head *nhash, *ohash;
655         unsigned int nsize, nmask, osize;
656         unsigned int i, h;
657
658         /* Rehash when load factor exceeds 0.75 */
659         if (clhash->hashelems * 4 <= clhash->hashsize * 3)
660                 return;
661         nsize = clhash->hashsize * 2;
662         nmask = nsize - 1;
663         nhash = qdisc_class_hash_alloc(nsize);
664         if (nhash == NULL)
665                 return;
666
667         ohash = clhash->hash;
668         osize = clhash->hashsize;
669
670         sch_tree_lock(sch);
671         for (i = 0; i < osize; i++) {
672                 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
673                         h = qdisc_class_hash(cl->classid, nmask);
674                         hlist_add_head(&cl->hnode, &nhash[h]);
675                 }
676         }
677         clhash->hash     = nhash;
678         clhash->hashsize = nsize;
679         clhash->hashmask = nmask;
680         sch_tree_unlock(sch);
681
682         kvfree(ohash);
683 }
684 EXPORT_SYMBOL(qdisc_class_hash_grow);
685
686 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
687 {
688         unsigned int size = 4;
689
690         clhash->hash = qdisc_class_hash_alloc(size);
691         if (!clhash->hash)
692                 return -ENOMEM;
693         clhash->hashsize  = size;
694         clhash->hashmask  = size - 1;
695         clhash->hashelems = 0;
696         return 0;
697 }
698 EXPORT_SYMBOL(qdisc_class_hash_init);
699
700 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
701 {
702         kvfree(clhash->hash);
703 }
704 EXPORT_SYMBOL(qdisc_class_hash_destroy);
705
706 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
707                              struct Qdisc_class_common *cl)
708 {
709         unsigned int h;
710
711         INIT_HLIST_NODE(&cl->hnode);
712         h = qdisc_class_hash(cl->classid, clhash->hashmask);
713         hlist_add_head(&cl->hnode, &clhash->hash[h]);
714         clhash->hashelems++;
715 }
716 EXPORT_SYMBOL(qdisc_class_hash_insert);
717
718 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
719                              struct Qdisc_class_common *cl)
720 {
721         hlist_del(&cl->hnode);
722         clhash->hashelems--;
723 }
724 EXPORT_SYMBOL(qdisc_class_hash_remove);
725
726 /* Allocate an unique handle from space managed by kernel
727  * Possible range is [8000-FFFF]:0000 (0x8000 values)
728  */
729 static u32 qdisc_alloc_handle(struct net_device *dev)
730 {
731         int i = 0x8000;
732         static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
733
734         do {
735                 autohandle += TC_H_MAKE(0x10000U, 0);
736                 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
737                         autohandle = TC_H_MAKE(0x80000000U, 0);
738                 if (!qdisc_lookup(dev, autohandle))
739                         return autohandle;
740                 cond_resched();
741         } while (--i > 0);
742
743         return 0;
744 }
745
746 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
747                                unsigned int len)
748 {
749         bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
750         const struct Qdisc_class_ops *cops;
751         unsigned long cl;
752         u32 parentid;
753         bool notify;
754         int drops;
755
756         if (n == 0 && len == 0)
757                 return;
758         drops = max_t(int, n, 0);
759         rcu_read_lock();
760         while ((parentid = sch->parent)) {
761                 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
762                         break;
763
764                 if (sch->flags & TCQ_F_NOPARENT)
765                         break;
766                 /* Notify parent qdisc only if child qdisc becomes empty.
767                  *
768                  * If child was empty even before update then backlog
769                  * counter is screwed and we skip notification because
770                  * parent class is already passive.
771                  *
772                  * If the original child was offloaded then it is allowed
773                  * to be seem as empty, so the parent is notified anyway.
774                  */
775                 notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
776                                                        !qdisc_is_offloaded);
777                 /* TODO: perform the search on a per txq basis */
778                 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
779                 if (sch == NULL) {
780                         WARN_ON_ONCE(parentid != TC_H_ROOT);
781                         break;
782                 }
783                 cops = sch->ops->cl_ops;
784                 if (notify && cops->qlen_notify) {
785                         cl = cops->find(sch, parentid);
786                         cops->qlen_notify(sch, cl);
787                 }
788                 sch->q.qlen -= n;
789                 sch->qstats.backlog -= len;
790                 __qdisc_qstats_drop(sch, drops);
791         }
792         rcu_read_unlock();
793 }
794 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
795
796 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
797                          u32 portid, u32 seq, u16 flags, int event)
798 {
799         struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
800         struct gnet_stats_queue __percpu *cpu_qstats = NULL;
801         struct tcmsg *tcm;
802         struct nlmsghdr  *nlh;
803         unsigned char *b = skb_tail_pointer(skb);
804         struct gnet_dump d;
805         struct qdisc_size_table *stab;
806         u32 block_index;
807         __u32 qlen;
808
809         cond_resched();
810         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
811         if (!nlh)
812                 goto out_nlmsg_trim;
813         tcm = nlmsg_data(nlh);
814         tcm->tcm_family = AF_UNSPEC;
815         tcm->tcm__pad1 = 0;
816         tcm->tcm__pad2 = 0;
817         tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
818         tcm->tcm_parent = clid;
819         tcm->tcm_handle = q->handle;
820         tcm->tcm_info = refcount_read(&q->refcnt);
821         if (nla_put_string(skb, TCA_KIND, q->ops->id))
822                 goto nla_put_failure;
823         if (q->ops->ingress_block_get) {
824                 block_index = q->ops->ingress_block_get(q);
825                 if (block_index &&
826                     nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
827                         goto nla_put_failure;
828         }
829         if (q->ops->egress_block_get) {
830                 block_index = q->ops->egress_block_get(q);
831                 if (block_index &&
832                     nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
833                         goto nla_put_failure;
834         }
835         if (q->ops->dump && q->ops->dump(q, skb) < 0)
836                 goto nla_put_failure;
837         if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
838                 goto nla_put_failure;
839         qlen = qdisc_qlen_sum(q);
840
841         stab = rtnl_dereference(q->stab);
842         if (stab && qdisc_dump_stab(skb, stab) < 0)
843                 goto nla_put_failure;
844
845         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
846                                          NULL, &d, TCA_PAD) < 0)
847                 goto nla_put_failure;
848
849         if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
850                 goto nla_put_failure;
851
852         if (qdisc_is_percpu_stats(q)) {
853                 cpu_bstats = q->cpu_bstats;
854                 cpu_qstats = q->cpu_qstats;
855         }
856
857         if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
858                                   &d, cpu_bstats, &q->bstats) < 0 ||
859             gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
860             gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
861                 goto nla_put_failure;
862
863         if (gnet_stats_finish_copy(&d) < 0)
864                 goto nla_put_failure;
865
866         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
867         return skb->len;
868
869 out_nlmsg_trim:
870 nla_put_failure:
871         nlmsg_trim(skb, b);
872         return -1;
873 }
874
875 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
876 {
877         if (q->flags & TCQ_F_BUILTIN)
878                 return true;
879         if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
880                 return true;
881
882         return false;
883 }
884
885 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
886                         struct nlmsghdr *n, u32 clid,
887                         struct Qdisc *old, struct Qdisc *new)
888 {
889         struct sk_buff *skb;
890         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
891
892         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
893         if (!skb)
894                 return -ENOBUFS;
895
896         if (old && !tc_qdisc_dump_ignore(old, false)) {
897                 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
898                                   0, RTM_DELQDISC) < 0)
899                         goto err_out;
900         }
901         if (new && !tc_qdisc_dump_ignore(new, false)) {
902                 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
903                                   old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
904                         goto err_out;
905         }
906
907         if (skb->len)
908                 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
909                                       n->nlmsg_flags & NLM_F_ECHO);
910
911 err_out:
912         kfree_skb(skb);
913         return -EINVAL;
914 }
915
916 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
917                                struct nlmsghdr *n, u32 clid,
918                                struct Qdisc *old, struct Qdisc *new)
919 {
920         if (new || old)
921                 qdisc_notify(net, skb, n, clid, old, new);
922
923         if (old)
924                 qdisc_destroy(old);
925 }
926
927 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
928  * to device "dev".
929  *
930  * When appropriate send a netlink notification using 'skb'
931  * and "n".
932  *
933  * On success, destroy old qdisc.
934  */
935
936 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
937                        struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
938                        struct Qdisc *new, struct Qdisc *old,
939                        struct netlink_ext_ack *extack)
940 {
941         struct Qdisc *q = old;
942         struct net *net = dev_net(dev);
943         int err = 0;
944
945         if (parent == NULL) {
946                 unsigned int i, num_q, ingress;
947
948                 ingress = 0;
949                 num_q = dev->num_tx_queues;
950                 if ((q && q->flags & TCQ_F_INGRESS) ||
951                     (new && new->flags & TCQ_F_INGRESS)) {
952                         num_q = 1;
953                         ingress = 1;
954                         if (!dev_ingress_queue(dev)) {
955                                 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
956                                 return -ENOENT;
957                         }
958                 }
959
960                 if (dev->flags & IFF_UP)
961                         dev_deactivate(dev);
962
963                 if (new && new->ops->attach)
964                         goto skip;
965
966                 for (i = 0; i < num_q; i++) {
967                         struct netdev_queue *dev_queue = dev_ingress_queue(dev);
968
969                         if (!ingress)
970                                 dev_queue = netdev_get_tx_queue(dev, i);
971
972                         old = dev_graft_qdisc(dev_queue, new);
973                         if (new && i > 0)
974                                 qdisc_refcount_inc(new);
975
976                         if (!ingress)
977                                 qdisc_destroy(old);
978                 }
979
980 skip:
981                 if (!ingress) {
982                         notify_and_destroy(net, skb, n, classid,
983                                            dev->qdisc, new);
984                         if (new && !new->ops->attach)
985                                 qdisc_refcount_inc(new);
986                         dev->qdisc = new ? : &noop_qdisc;
987
988                         if (new && new->ops->attach)
989                                 new->ops->attach(new);
990                 } else {
991                         notify_and_destroy(net, skb, n, classid, old, new);
992                 }
993
994                 if (dev->flags & IFF_UP)
995                         dev_activate(dev);
996         } else {
997                 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
998
999                 /* Only support running class lockless if parent is lockless */
1000                 if (new && (new->flags & TCQ_F_NOLOCK) &&
1001                     parent && !(parent->flags & TCQ_F_NOLOCK))
1002                         new->flags &= ~TCQ_F_NOLOCK;
1003
1004                 err = -EOPNOTSUPP;
1005                 if (cops && cops->graft) {
1006                         unsigned long cl = cops->find(parent, classid);
1007
1008                         if (cl) {
1009                                 err = cops->graft(parent, cl, new, &old,
1010                                                   extack);
1011                         } else {
1012                                 NL_SET_ERR_MSG(extack, "Specified class not found");
1013                                 err = -ENOENT;
1014                         }
1015                 }
1016                 if (!err)
1017                         notify_and_destroy(net, skb, n, classid, old, new);
1018         }
1019         return err;
1020 }
1021
1022 static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1023                                    struct netlink_ext_ack *extack)
1024 {
1025         u32 block_index;
1026
1027         if (tca[TCA_INGRESS_BLOCK]) {
1028                 block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1029
1030                 if (!block_index) {
1031                         NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1032                         return -EINVAL;
1033                 }
1034                 if (!sch->ops->ingress_block_set) {
1035                         NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1036                         return -EOPNOTSUPP;
1037                 }
1038                 sch->ops->ingress_block_set(sch, block_index);
1039         }
1040         if (tca[TCA_EGRESS_BLOCK]) {
1041                 block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1042
1043                 if (!block_index) {
1044                         NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1045                         return -EINVAL;
1046                 }
1047                 if (!sch->ops->egress_block_set) {
1048                         NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1049                         return -EOPNOTSUPP;
1050                 }
1051                 sch->ops->egress_block_set(sch, block_index);
1052         }
1053         return 0;
1054 }
1055
1056 /* lockdep annotation is needed for ingress; egress gets it only for name */
1057 static struct lock_class_key qdisc_tx_lock;
1058 static struct lock_class_key qdisc_rx_lock;
1059
1060 /*
1061    Allocate and initialize new qdisc.
1062
1063    Parameters are passed via opt.
1064  */
1065
1066 static struct Qdisc *qdisc_create(struct net_device *dev,
1067                                   struct netdev_queue *dev_queue,
1068                                   struct Qdisc *p, u32 parent, u32 handle,
1069                                   struct nlattr **tca, int *errp,
1070                                   struct netlink_ext_ack *extack)
1071 {
1072         int err;
1073         struct nlattr *kind = tca[TCA_KIND];
1074         struct Qdisc *sch;
1075         struct Qdisc_ops *ops;
1076         struct qdisc_size_table *stab;
1077
1078         ops = qdisc_lookup_ops(kind);
1079 #ifdef CONFIG_MODULES
1080         if (ops == NULL && kind != NULL) {
1081                 char name[IFNAMSIZ];
1082                 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
1083                         /* We dropped the RTNL semaphore in order to
1084                          * perform the module load.  So, even if we
1085                          * succeeded in loading the module we have to
1086                          * tell the caller to replay the request.  We
1087                          * indicate this using -EAGAIN.
1088                          * We replay the request because the device may
1089                          * go away in the mean time.
1090                          */
1091                         rtnl_unlock();
1092                         request_module("sch_%s", name);
1093                         rtnl_lock();
1094                         ops = qdisc_lookup_ops(kind);
1095                         if (ops != NULL) {
1096                                 /* We will try again qdisc_lookup_ops,
1097                                  * so don't keep a reference.
1098                                  */
1099                                 module_put(ops->owner);
1100                                 err = -EAGAIN;
1101                                 goto err_out;
1102                         }
1103                 }
1104         }
1105 #endif
1106
1107         err = -ENOENT;
1108         if (!ops) {
1109                 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1110                 goto err_out;
1111         }
1112
1113         sch = qdisc_alloc(dev_queue, ops, extack);
1114         if (IS_ERR(sch)) {
1115                 err = PTR_ERR(sch);
1116                 goto err_out2;
1117         }
1118
1119         sch->parent = parent;
1120
1121         if (handle == TC_H_INGRESS) {
1122                 sch->flags |= TCQ_F_INGRESS;
1123                 handle = TC_H_MAKE(TC_H_INGRESS, 0);
1124                 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
1125         } else {
1126                 if (handle == 0) {
1127                         handle = qdisc_alloc_handle(dev);
1128                         err = -ENOMEM;
1129                         if (handle == 0)
1130                                 goto err_out3;
1131                 }
1132                 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
1133                 if (!netif_is_multiqueue(dev))
1134                         sch->flags |= TCQ_F_ONETXQUEUE;
1135         }
1136
1137         sch->handle = handle;
1138
1139         /* This exist to keep backward compatible with a userspace
1140          * loophole, what allowed userspace to get IFF_NO_QUEUE
1141          * facility on older kernels by setting tx_queue_len=0 (prior
1142          * to qdisc init), and then forgot to reinit tx_queue_len
1143          * before again attaching a qdisc.
1144          */
1145         if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1146                 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1147                 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1148         }
1149
1150         err = qdisc_block_indexes_set(sch, tca, extack);
1151         if (err)
1152                 goto err_out3;
1153
1154         if (ops->init) {
1155                 err = ops->init(sch, tca[TCA_OPTIONS], extack);
1156                 if (err != 0)
1157                         goto err_out5;
1158         }
1159
1160         if (tca[TCA_STAB]) {
1161                 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1162                 if (IS_ERR(stab)) {
1163                         err = PTR_ERR(stab);
1164                         goto err_out4;
1165                 }
1166                 rcu_assign_pointer(sch->stab, stab);
1167         }
1168         if (tca[TCA_RATE]) {
1169                 seqcount_t *running;
1170
1171                 err = -EOPNOTSUPP;
1172                 if (sch->flags & TCQ_F_MQROOT) {
1173                         NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
1174                         goto err_out4;
1175                 }
1176
1177                 if (sch->parent != TC_H_ROOT &&
1178                     !(sch->flags & TCQ_F_INGRESS) &&
1179                     (!p || !(p->flags & TCQ_F_MQROOT)))
1180                         running = qdisc_root_sleeping_running(sch);
1181                 else
1182                         running = &sch->running;
1183
1184                 err = gen_new_estimator(&sch->bstats,
1185                                         sch->cpu_bstats,
1186                                         &sch->rate_est,
1187                                         NULL,
1188                                         running,
1189                                         tca[TCA_RATE]);
1190                 if (err) {
1191                         NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
1192                         goto err_out4;
1193                 }
1194         }
1195
1196         qdisc_hash_add(sch, false);
1197
1198         return sch;
1199
1200 err_out5:
1201         /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1202         if (ops->destroy)
1203                 ops->destroy(sch);
1204 err_out3:
1205         dev_put(dev);
1206         qdisc_free(sch);
1207 err_out2:
1208         module_put(ops->owner);
1209 err_out:
1210         *errp = err;
1211         return NULL;
1212
1213 err_out4:
1214         /*
1215          * Any broken qdiscs that would require a ops->reset() here?
1216          * The qdisc was never in action so it shouldn't be necessary.
1217          */
1218         qdisc_put_stab(rtnl_dereference(sch->stab));
1219         if (ops->destroy)
1220                 ops->destroy(sch);
1221         goto err_out3;
1222 }
1223
1224 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1225                         struct netlink_ext_ack *extack)
1226 {
1227         struct qdisc_size_table *ostab, *stab = NULL;
1228         int err = 0;
1229
1230         if (tca[TCA_OPTIONS]) {
1231                 if (!sch->ops->change) {
1232                         NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1233                         return -EINVAL;
1234                 }
1235                 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1236                         NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1237                         return -EOPNOTSUPP;
1238                 }
1239                 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1240                 if (err)
1241                         return err;
1242         }
1243
1244         if (tca[TCA_STAB]) {
1245                 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1246                 if (IS_ERR(stab))
1247                         return PTR_ERR(stab);
1248         }
1249
1250         ostab = rtnl_dereference(sch->stab);
1251         rcu_assign_pointer(sch->stab, stab);
1252         qdisc_put_stab(ostab);
1253
1254         if (tca[TCA_RATE]) {
1255                 /* NB: ignores errors from replace_estimator
1256                    because change can't be undone. */
1257                 if (sch->flags & TCQ_F_MQROOT)
1258                         goto out;
1259                 gen_replace_estimator(&sch->bstats,
1260                                       sch->cpu_bstats,
1261                                       &sch->rate_est,
1262                                       NULL,
1263                                       qdisc_root_sleeping_running(sch),
1264                                       tca[TCA_RATE]);
1265         }
1266 out:
1267         return 0;
1268 }
1269
1270 struct check_loop_arg {
1271         struct qdisc_walker     w;
1272         struct Qdisc            *p;
1273         int                     depth;
1274 };
1275
1276 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1277                          struct qdisc_walker *w);
1278
1279 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1280 {
1281         struct check_loop_arg   arg;
1282
1283         if (q->ops->cl_ops == NULL)
1284                 return 0;
1285
1286         arg.w.stop = arg.w.skip = arg.w.count = 0;
1287         arg.w.fn = check_loop_fn;
1288         arg.depth = depth;
1289         arg.p = p;
1290         q->ops->cl_ops->walk(q, &arg.w);
1291         return arg.w.stop ? -ELOOP : 0;
1292 }
1293
1294 static int
1295 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1296 {
1297         struct Qdisc *leaf;
1298         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1299         struct check_loop_arg *arg = (struct check_loop_arg *)w;
1300
1301         leaf = cops->leaf(q, cl);
1302         if (leaf) {
1303                 if (leaf == arg->p || arg->depth > 7)
1304                         return -ELOOP;
1305                 return check_loop(leaf, arg->p, arg->depth + 1);
1306         }
1307         return 0;
1308 }
1309
1310 /*
1311  * Delete/get qdisc.
1312  */
1313
1314 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1315         [TCA_KIND]              = { .type = NLA_STRING },
1316         [TCA_OPTIONS]           = { .type = NLA_NESTED },
1317         [TCA_RATE]              = { .type = NLA_BINARY,
1318                                     .len = sizeof(struct tc_estimator) },
1319         [TCA_STAB]              = { .type = NLA_NESTED },
1320         [TCA_DUMP_INVISIBLE]    = { .type = NLA_FLAG },
1321         [TCA_CHAIN]             = { .type = NLA_U32 },
1322         [TCA_INGRESS_BLOCK]     = { .type = NLA_U32 },
1323         [TCA_EGRESS_BLOCK]      = { .type = NLA_U32 },
1324 };
1325
1326 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1327                         struct netlink_ext_ack *extack)
1328 {
1329         struct net *net = sock_net(skb->sk);
1330         struct tcmsg *tcm = nlmsg_data(n);
1331         struct nlattr *tca[TCA_MAX + 1];
1332         struct net_device *dev;
1333         u32 clid;
1334         struct Qdisc *q = NULL;
1335         struct Qdisc *p = NULL;
1336         int err;
1337
1338         if ((n->nlmsg_type != RTM_GETQDISC) &&
1339             !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1340                 return -EPERM;
1341
1342         err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
1343                           extack);
1344         if (err < 0)
1345                 return err;
1346
1347         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1348         if (!dev)
1349                 return -ENODEV;
1350
1351         clid = tcm->tcm_parent;
1352         if (clid) {
1353                 if (clid != TC_H_ROOT) {
1354                         if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1355                                 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1356                                 if (!p) {
1357                                         NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1358                                         return -ENOENT;
1359                                 }
1360                                 q = qdisc_leaf(p, clid);
1361                         } else if (dev_ingress_queue(dev)) {
1362                                 q = dev_ingress_queue(dev)->qdisc_sleeping;
1363                         }
1364                 } else {
1365                         q = dev->qdisc;
1366                 }
1367                 if (!q) {
1368                         NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1369                         return -ENOENT;
1370                 }
1371
1372                 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1373                         NL_SET_ERR_MSG(extack, "Invalid handle");
1374                         return -EINVAL;
1375                 }
1376         } else {
1377                 q = qdisc_lookup(dev, tcm->tcm_handle);
1378                 if (!q) {
1379                         NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1380                         return -ENOENT;
1381                 }
1382         }
1383
1384         if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1385                 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1386                 return -EINVAL;
1387         }
1388
1389         if (n->nlmsg_type == RTM_DELQDISC) {
1390                 if (!clid) {
1391                         NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1392                         return -EINVAL;
1393                 }
1394                 if (q->handle == 0) {
1395                         NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1396                         return -ENOENT;
1397                 }
1398                 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
1399                 if (err != 0)
1400                         return err;
1401         } else {
1402                 qdisc_notify(net, skb, n, clid, NULL, q);
1403         }
1404         return 0;
1405 }
1406
1407 /*
1408  * Create/change qdisc.
1409  */
1410
1411 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1412                            struct netlink_ext_ack *extack)
1413 {
1414         struct net *net = sock_net(skb->sk);
1415         struct tcmsg *tcm;
1416         struct nlattr *tca[TCA_MAX + 1];
1417         struct net_device *dev;
1418         u32 clid;
1419         struct Qdisc *q, *p;
1420         int err;
1421
1422         if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1423                 return -EPERM;
1424
1425 replay:
1426         /* Reinit, just in case something touches this. */
1427         err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
1428                           extack);
1429         if (err < 0)
1430                 return err;
1431
1432         tcm = nlmsg_data(n);
1433         clid = tcm->tcm_parent;
1434         q = p = NULL;
1435
1436         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1437         if (!dev)
1438                 return -ENODEV;
1439
1440
1441         if (clid) {
1442                 if (clid != TC_H_ROOT) {
1443                         if (clid != TC_H_INGRESS) {
1444                                 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1445                                 if (!p) {
1446                                         NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1447                                         return -ENOENT;
1448                                 }
1449                                 q = qdisc_leaf(p, clid);
1450                         } else if (dev_ingress_queue_create(dev)) {
1451                                 q = dev_ingress_queue(dev)->qdisc_sleeping;
1452                         }
1453                 } else {
1454                         q = dev->qdisc;
1455                 }
1456
1457                 /* It may be default qdisc, ignore it */
1458                 if (q && q->handle == 0)
1459                         q = NULL;
1460
1461                 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1462                         if (tcm->tcm_handle) {
1463                                 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1464                                         NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1465                                         return -EEXIST;
1466                                 }
1467                                 if (TC_H_MIN(tcm->tcm_handle)) {
1468                                         NL_SET_ERR_MSG(extack, "Invalid minor handle");
1469                                         return -EINVAL;
1470                                 }
1471                                 q = qdisc_lookup(dev, tcm->tcm_handle);
1472                                 if (!q)
1473                                         goto create_n_graft;
1474                                 if (n->nlmsg_flags & NLM_F_EXCL) {
1475                                         NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1476                                         return -EEXIST;
1477                                 }
1478                                 if (tca[TCA_KIND] &&
1479                                     nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1480                                         NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1481                                         return -EINVAL;
1482                                 }
1483                                 if (q == p ||
1484                                     (p && check_loop(q, p, 0))) {
1485                                         NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1486                                         return -ELOOP;
1487                                 }
1488                                 qdisc_refcount_inc(q);
1489                                 goto graft;
1490                         } else {
1491                                 if (!q)
1492                                         goto create_n_graft;
1493
1494                                 /* This magic test requires explanation.
1495                                  *
1496                                  *   We know, that some child q is already
1497                                  *   attached to this parent and have choice:
1498                                  *   either to change it or to create/graft new one.
1499                                  *
1500                                  *   1. We are allowed to create/graft only
1501                                  *   if CREATE and REPLACE flags are set.
1502                                  *
1503                                  *   2. If EXCL is set, requestor wanted to say,
1504                                  *   that qdisc tcm_handle is not expected
1505                                  *   to exist, so that we choose create/graft too.
1506                                  *
1507                                  *   3. The last case is when no flags are set.
1508                                  *   Alas, it is sort of hole in API, we
1509                                  *   cannot decide what to do unambiguously.
1510                                  *   For now we select create/graft, if
1511                                  *   user gave KIND, which does not match existing.
1512                                  */
1513                                 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1514                                     (n->nlmsg_flags & NLM_F_REPLACE) &&
1515                                     ((n->nlmsg_flags & NLM_F_EXCL) ||
1516                                      (tca[TCA_KIND] &&
1517                                       nla_strcmp(tca[TCA_KIND], q->ops->id))))
1518                                         goto create_n_graft;
1519                         }
1520                 }
1521         } else {
1522                 if (!tcm->tcm_handle) {
1523                         NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1524                         return -EINVAL;
1525                 }
1526                 q = qdisc_lookup(dev, tcm->tcm_handle);
1527         }
1528
1529         /* Change qdisc parameters */
1530         if (!q) {
1531                 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1532                 return -ENOENT;
1533         }
1534         if (n->nlmsg_flags & NLM_F_EXCL) {
1535                 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1536                 return -EEXIST;
1537         }
1538         if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1539                 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1540                 return -EINVAL;
1541         }
1542         err = qdisc_change(q, tca, extack);
1543         if (err == 0)
1544                 qdisc_notify(net, skb, n, clid, NULL, q);
1545         return err;
1546
1547 create_n_graft:
1548         if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1549                 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1550                 return -ENOENT;
1551         }
1552         if (clid == TC_H_INGRESS) {
1553                 if (dev_ingress_queue(dev)) {
1554                         q = qdisc_create(dev, dev_ingress_queue(dev), p,
1555                                          tcm->tcm_parent, tcm->tcm_parent,
1556                                          tca, &err, extack);
1557                 } else {
1558                         NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
1559                         err = -ENOENT;
1560                 }
1561         } else {
1562                 struct netdev_queue *dev_queue;
1563
1564                 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1565                         dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1566                 else if (p)
1567                         dev_queue = p->dev_queue;
1568                 else
1569                         dev_queue = netdev_get_tx_queue(dev, 0);
1570
1571                 q = qdisc_create(dev, dev_queue, p,
1572                                  tcm->tcm_parent, tcm->tcm_handle,
1573                                  tca, &err, extack);
1574         }
1575         if (q == NULL) {
1576                 if (err == -EAGAIN)
1577                         goto replay;
1578                 return err;
1579         }
1580
1581 graft:
1582         err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
1583         if (err) {
1584                 if (q)
1585                         qdisc_destroy(q);
1586                 return err;
1587         }
1588
1589         return 0;
1590 }
1591
1592 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1593                               struct netlink_callback *cb,
1594                               int *q_idx_p, int s_q_idx, bool recur,
1595                               bool dump_invisible)
1596 {
1597         int ret = 0, q_idx = *q_idx_p;
1598         struct Qdisc *q;
1599         int b;
1600
1601         if (!root)
1602                 return 0;
1603
1604         q = root;
1605         if (q_idx < s_q_idx) {
1606                 q_idx++;
1607         } else {
1608                 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1609                     tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1610                                   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1611                                   RTM_NEWQDISC) <= 0)
1612                         goto done;
1613                 q_idx++;
1614         }
1615
1616         /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1617          * itself has already been dumped.
1618          *
1619          * If we've already dumped the top-level (ingress) qdisc above and the global
1620          * qdisc hashtable, we don't want to hit it again
1621          */
1622         if (!qdisc_dev(root) || !recur)
1623                 goto out;
1624
1625         hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1626                 if (q_idx < s_q_idx) {
1627                         q_idx++;
1628                         continue;
1629                 }
1630                 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1631                     tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1632                                   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1633                                   RTM_NEWQDISC) <= 0)
1634                         goto done;
1635                 q_idx++;
1636         }
1637
1638 out:
1639         *q_idx_p = q_idx;
1640         return ret;
1641 done:
1642         ret = -1;
1643         goto out;
1644 }
1645
1646 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1647 {
1648         struct net *net = sock_net(skb->sk);
1649         int idx, q_idx;
1650         int s_idx, s_q_idx;
1651         struct net_device *dev;
1652         const struct nlmsghdr *nlh = cb->nlh;
1653         struct nlattr *tca[TCA_MAX + 1];
1654         int err;
1655
1656         s_idx = cb->args[0];
1657         s_q_idx = q_idx = cb->args[1];
1658
1659         idx = 0;
1660         ASSERT_RTNL();
1661
1662         err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1663                           rtm_tca_policy, NULL);
1664         if (err < 0)
1665                 return err;
1666
1667         for_each_netdev(net, dev) {
1668                 struct netdev_queue *dev_queue;
1669
1670                 if (idx < s_idx)
1671                         goto cont;
1672                 if (idx > s_idx)
1673                         s_q_idx = 0;
1674                 q_idx = 0;
1675
1676                 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
1677                                        true, tca[TCA_DUMP_INVISIBLE]) < 0)
1678                         goto done;
1679
1680                 dev_queue = dev_ingress_queue(dev);
1681                 if (dev_queue &&
1682                     tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1683                                        &q_idx, s_q_idx, false,
1684                                        tca[TCA_DUMP_INVISIBLE]) < 0)
1685                         goto done;
1686
1687 cont:
1688                 idx++;
1689         }
1690
1691 done:
1692         cb->args[0] = idx;
1693         cb->args[1] = q_idx;
1694
1695         return skb->len;
1696 }
1697
1698
1699
1700 /************************************************
1701  *      Traffic classes manipulation.           *
1702  ************************************************/
1703
1704 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1705                           unsigned long cl,
1706                           u32 portid, u32 seq, u16 flags, int event)
1707 {
1708         struct tcmsg *tcm;
1709         struct nlmsghdr  *nlh;
1710         unsigned char *b = skb_tail_pointer(skb);
1711         struct gnet_dump d;
1712         const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1713
1714         cond_resched();
1715         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1716         if (!nlh)
1717                 goto out_nlmsg_trim;
1718         tcm = nlmsg_data(nlh);
1719         tcm->tcm_family = AF_UNSPEC;
1720         tcm->tcm__pad1 = 0;
1721         tcm->tcm__pad2 = 0;
1722         tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1723         tcm->tcm_parent = q->handle;
1724         tcm->tcm_handle = q->handle;
1725         tcm->tcm_info = 0;
1726         if (nla_put_string(skb, TCA_KIND, q->ops->id))
1727                 goto nla_put_failure;
1728         if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1729                 goto nla_put_failure;
1730
1731         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1732                                          NULL, &d, TCA_PAD) < 0)
1733                 goto nla_put_failure;
1734
1735         if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1736                 goto nla_put_failure;
1737
1738         if (gnet_stats_finish_copy(&d) < 0)
1739                 goto nla_put_failure;
1740
1741         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1742         return skb->len;
1743
1744 out_nlmsg_trim:
1745 nla_put_failure:
1746         nlmsg_trim(skb, b);
1747         return -1;
1748 }
1749
1750 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1751                          struct nlmsghdr *n, struct Qdisc *q,
1752                          unsigned long cl, int event)
1753 {
1754         struct sk_buff *skb;
1755         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1756
1757         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1758         if (!skb)
1759                 return -ENOBUFS;
1760
1761         if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1762                 kfree_skb(skb);
1763                 return -EINVAL;
1764         }
1765
1766         return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1767                               n->nlmsg_flags & NLM_F_ECHO);
1768 }
1769
1770 static int tclass_del_notify(struct net *net,
1771                              const struct Qdisc_class_ops *cops,
1772                              struct sk_buff *oskb, struct nlmsghdr *n,
1773                              struct Qdisc *q, unsigned long cl)
1774 {
1775         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1776         struct sk_buff *skb;
1777         int err = 0;
1778
1779         if (!cops->delete)
1780                 return -EOPNOTSUPP;
1781
1782         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1783         if (!skb)
1784                 return -ENOBUFS;
1785
1786         if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1787                            RTM_DELTCLASS) < 0) {
1788                 kfree_skb(skb);
1789                 return -EINVAL;
1790         }
1791
1792         err = cops->delete(q, cl);
1793         if (err) {
1794                 kfree_skb(skb);
1795                 return err;
1796         }
1797
1798         return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1799                               n->nlmsg_flags & NLM_F_ECHO);
1800 }
1801
1802 #ifdef CONFIG_NET_CLS
1803
1804 struct tcf_bind_args {
1805         struct tcf_walker w;
1806         u32 classid;
1807         unsigned long cl;
1808 };
1809
1810 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1811 {
1812         struct tcf_bind_args *a = (void *)arg;
1813
1814         if (tp->ops->bind_class) {
1815                 struct Qdisc *q = tcf_block_q(tp->chain->block);
1816
1817                 sch_tree_lock(q);
1818                 tp->ops->bind_class(n, a->classid, a->cl);
1819                 sch_tree_unlock(q);
1820         }
1821         return 0;
1822 }
1823
1824 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1825                            unsigned long new_cl)
1826 {
1827         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1828         struct tcf_block *block;
1829         struct tcf_chain *chain;
1830         unsigned long cl;
1831
1832         cl = cops->find(q, portid);
1833         if (!cl)
1834                 return;
1835         block = cops->tcf_block(q, cl, NULL);
1836         if (!block)
1837                 return;
1838         list_for_each_entry(chain, &block->chain_list, list) {
1839                 struct tcf_proto *tp;
1840
1841                 for (tp = rtnl_dereference(chain->filter_chain);
1842                      tp; tp = rtnl_dereference(tp->next)) {
1843                         struct tcf_bind_args arg = {};
1844
1845                         arg.w.fn = tcf_node_bind;
1846                         arg.classid = clid;
1847                         arg.cl = new_cl;
1848                         tp->ops->walk(tp, &arg.w);
1849                 }
1850         }
1851 }
1852
1853 #else
1854
1855 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1856                            unsigned long new_cl)
1857 {
1858 }
1859
1860 #endif
1861
1862 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1863                          struct netlink_ext_ack *extack)
1864 {
1865         struct net *net = sock_net(skb->sk);
1866         struct tcmsg *tcm = nlmsg_data(n);
1867         struct nlattr *tca[TCA_MAX + 1];
1868         struct net_device *dev;
1869         struct Qdisc *q = NULL;
1870         const struct Qdisc_class_ops *cops;
1871         unsigned long cl = 0;
1872         unsigned long new_cl;
1873         u32 portid;
1874         u32 clid;
1875         u32 qid;
1876         int err;
1877
1878         if ((n->nlmsg_type != RTM_GETTCLASS) &&
1879             !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1880                 return -EPERM;
1881
1882         err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
1883                           extack);
1884         if (err < 0)
1885                 return err;
1886
1887         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1888         if (!dev)
1889                 return -ENODEV;
1890
1891         /*
1892            parent == TC_H_UNSPEC - unspecified parent.
1893            parent == TC_H_ROOT   - class is root, which has no parent.
1894            parent == X:0         - parent is root class.
1895            parent == X:Y         - parent is a node in hierarchy.
1896            parent == 0:Y         - parent is X:Y, where X:0 is qdisc.
1897
1898            handle == 0:0         - generate handle from kernel pool.
1899            handle == 0:Y         - class is X:Y, where X:0 is qdisc.
1900            handle == X:Y         - clear.
1901            handle == X:0         - root class.
1902          */
1903
1904         /* Step 1. Determine qdisc handle X:0 */
1905
1906         portid = tcm->tcm_parent;
1907         clid = tcm->tcm_handle;
1908         qid = TC_H_MAJ(clid);
1909
1910         if (portid != TC_H_ROOT) {
1911                 u32 qid1 = TC_H_MAJ(portid);
1912
1913                 if (qid && qid1) {
1914                         /* If both majors are known, they must be identical. */
1915                         if (qid != qid1)
1916                                 return -EINVAL;
1917                 } else if (qid1) {
1918                         qid = qid1;
1919                 } else if (qid == 0)
1920                         qid = dev->qdisc->handle;
1921
1922                 /* Now qid is genuine qdisc handle consistent
1923                  * both with parent and child.
1924                  *
1925                  * TC_H_MAJ(portid) still may be unspecified, complete it now.
1926                  */
1927                 if (portid)
1928                         portid = TC_H_MAKE(qid, portid);
1929         } else {
1930                 if (qid == 0)
1931                         qid = dev->qdisc->handle;
1932         }
1933
1934         /* OK. Locate qdisc */
1935         q = qdisc_lookup(dev, qid);
1936         if (!q)
1937                 return -ENOENT;
1938
1939         /* An check that it supports classes */
1940         cops = q->ops->cl_ops;
1941         if (cops == NULL)
1942                 return -EINVAL;
1943
1944         /* Now try to get class */
1945         if (clid == 0) {
1946                 if (portid == TC_H_ROOT)
1947                         clid = qid;
1948         } else
1949                 clid = TC_H_MAKE(qid, clid);
1950
1951         if (clid)
1952                 cl = cops->find(q, clid);
1953
1954         if (cl == 0) {
1955                 err = -ENOENT;
1956                 if (n->nlmsg_type != RTM_NEWTCLASS ||
1957                     !(n->nlmsg_flags & NLM_F_CREATE))
1958                         goto out;
1959         } else {
1960                 switch (n->nlmsg_type) {
1961                 case RTM_NEWTCLASS:
1962                         err = -EEXIST;
1963                         if (n->nlmsg_flags & NLM_F_EXCL)
1964                                 goto out;
1965                         break;
1966                 case RTM_DELTCLASS:
1967                         err = tclass_del_notify(net, cops, skb, n, q, cl);
1968                         /* Unbind the class with flilters with 0 */
1969                         tc_bind_tclass(q, portid, clid, 0);
1970                         goto out;
1971                 case RTM_GETTCLASS:
1972                         err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
1973                         goto out;
1974                 default:
1975                         err = -EINVAL;
1976                         goto out;
1977                 }
1978         }
1979
1980         if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1981                 NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
1982                 return -EOPNOTSUPP;
1983         }
1984
1985         new_cl = cl;
1986         err = -EOPNOTSUPP;
1987         if (cops->change)
1988                 err = cops->change(q, clid, portid, tca, &new_cl, extack);
1989         if (err == 0) {
1990                 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
1991                 /* We just create a new class, need to do reverse binding. */
1992                 if (cl != new_cl)
1993                         tc_bind_tclass(q, portid, clid, new_cl);
1994         }
1995 out:
1996         return err;
1997 }
1998
1999 struct qdisc_dump_args {
2000         struct qdisc_walker     w;
2001         struct sk_buff          *skb;
2002         struct netlink_callback *cb;
2003 };
2004
2005 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2006                             struct qdisc_walker *arg)
2007 {
2008         struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2009
2010         return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
2011                               a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2012                               RTM_NEWTCLASS);
2013 }
2014
2015 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2016                                 struct tcmsg *tcm, struct netlink_callback *cb,
2017                                 int *t_p, int s_t)
2018 {
2019         struct qdisc_dump_args arg;
2020
2021         if (tc_qdisc_dump_ignore(q, false) ||
2022             *t_p < s_t || !q->ops->cl_ops ||
2023             (tcm->tcm_parent &&
2024              TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2025                 (*t_p)++;
2026                 return 0;
2027         }
2028         if (*t_p > s_t)
2029                 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2030         arg.w.fn = qdisc_class_dump;
2031         arg.skb = skb;
2032         arg.cb = cb;
2033         arg.w.stop  = 0;
2034         arg.w.skip = cb->args[1];
2035         arg.w.count = 0;
2036         q->ops->cl_ops->walk(q, &arg.w);
2037         cb->args[1] = arg.w.count;
2038         if (arg.w.stop)
2039                 return -1;
2040         (*t_p)++;
2041         return 0;
2042 }
2043
2044 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2045                                struct tcmsg *tcm, struct netlink_callback *cb,
2046                                int *t_p, int s_t)
2047 {
2048         struct Qdisc *q;
2049         int b;
2050
2051         if (!root)
2052                 return 0;
2053
2054         if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2055                 return -1;
2056
2057         if (!qdisc_dev(root))
2058                 return 0;
2059
2060         if (tcm->tcm_parent) {
2061                 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
2062                 if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2063                         return -1;
2064                 return 0;
2065         }
2066         hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
2067                 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2068                         return -1;
2069         }
2070
2071         return 0;
2072 }
2073
2074 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2075 {
2076         struct tcmsg *tcm = nlmsg_data(cb->nlh);
2077         struct net *net = sock_net(skb->sk);
2078         struct netdev_queue *dev_queue;
2079         struct net_device *dev;
2080         int t, s_t;
2081
2082         if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2083                 return 0;
2084         dev = dev_get_by_index(net, tcm->tcm_ifindex);
2085         if (!dev)
2086                 return 0;
2087
2088         s_t = cb->args[0];
2089         t = 0;
2090
2091         if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
2092                 goto done;
2093
2094         dev_queue = dev_ingress_queue(dev);
2095         if (dev_queue &&
2096             tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
2097                                 &t, s_t) < 0)
2098                 goto done;
2099
2100 done:
2101         cb->args[0] = t;
2102
2103         dev_put(dev);
2104         return skb->len;
2105 }
2106
2107 #ifdef CONFIG_PROC_FS
2108 static int psched_show(struct seq_file *seq, void *v)
2109 {
2110         seq_printf(seq, "%08x %08x %08x %08x\n",
2111                    (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
2112                    1000000,
2113                    (u32)NSEC_PER_SEC / hrtimer_resolution);
2114
2115         return 0;
2116 }
2117
2118 static int __net_init psched_net_init(struct net *net)
2119 {
2120         struct proc_dir_entry *e;
2121
2122         e = proc_create_single("psched", 0, net->proc_net, psched_show);
2123         if (e == NULL)
2124                 return -ENOMEM;
2125
2126         return 0;
2127 }
2128
2129 static void __net_exit psched_net_exit(struct net *net)
2130 {
2131         remove_proc_entry("psched", net->proc_net);
2132 }
2133 #else
2134 static int __net_init psched_net_init(struct net *net)
2135 {
2136         return 0;
2137 }
2138
2139 static void __net_exit psched_net_exit(struct net *net)
2140 {
2141 }
2142 #endif
2143
2144 static struct pernet_operations psched_net_ops = {
2145         .init = psched_net_init,
2146         .exit = psched_net_exit,
2147 };
2148
2149 static int __init pktsched_init(void)
2150 {
2151         int err;
2152
2153         err = register_pernet_subsys(&psched_net_ops);
2154         if (err) {
2155                 pr_err("pktsched_init: "
2156                        "cannot initialize per netns operations\n");
2157                 return err;
2158         }
2159
2160         register_qdisc(&pfifo_fast_ops);
2161         register_qdisc(&pfifo_qdisc_ops);
2162         register_qdisc(&bfifo_qdisc_ops);
2163         register_qdisc(&pfifo_head_drop_qdisc_ops);
2164         register_qdisc(&mq_qdisc_ops);
2165         register_qdisc(&noqueue_qdisc_ops);
2166
2167         rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2168         rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
2169         rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2170                       0);
2171         rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2172         rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
2173         rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2174                       0);
2175
2176         return 0;
2177 }
2178
2179 subsys_initcall(pktsched_init);